146 ld.param.u32 %r22, [_Z11kmeansPointPfiiiPiS_S_S0__param_1];
147 ld.param.u32 %r23, [_Z11kmeansPointPfiiiPiS_S_S0__param_2];
148 ld.param.u32 %r24, [_Z11kmeansPointPfiiiPiS_S_S0__param_3];
149 ld.param.u64 %rd5, [_Z11kmeansPointPfiiiPiS_S_S0__param_4];
150 mov.u32 %r26, %nctaid.x;
151 mov.u32 %r27, %ctaid.y;
152 mov.u32 %r28, %ctaid.x;
153 mad.lo.s32 %r29, %r27, %r26, %r28;
154 mov.u32 %r30, %ntid.x;
155 mul.lo.s32 %r1, %r29, %r30;
156 mov.u32 %r2, %ntid.y;
157 mov.u32 %r3, %tid.x;
158 mad.lo.s32 %r4, %r1, %r2, %r3;
159 setp.lt.u32%p1, %r4, %r23;
160 setp.gt.s32%p2, %r24, 0;
161 and.pred %p3, %p1, %p2;
162 mov.u32 %r55, -1;
163 @!%p3 bra BB1_13;
164 bra.uni BB1_1;
167 mov.u32 %r55, -1;
168 mov.u32 %r48, 0;
169 mov.f32 %f62, 0f7F7FFFFF;
172 mul.lo.s32 %r8, %r48, %r22;
173 mov.f32 %f67, 0f00000000;
174 setp.lt.s32%p4, %r22, 1;
175 @%p4 bra BB1_12;
177 and.b32 %r36, %r22, 3;
178 mov.u32 %r52, 0;
179 mov.f32 %f67, 0f00000000;
180 setp.eq.s32%p5, %r36, 0;
181 @%p5 bra BB1_9;
183 setp.eq.s32%p6, %r36, 1;
184 @%p6 bra BB1_8;
186 setp.eq.s32%p7, %r36, 2;
187 @%p7 bra BB1_7;
199 neg.s32 %r38, %r52;
200 and.b32 %r39, %r38, %r23;
201 add.s32 %r40, %r39, %r4;
202 tex.1d.v4.f32.s32{%f23, %f24, %f25, %f26}, [t_features, {%r40}];
203 add.s32 %r41, %r52, %r8;
204 mul.wide.s32 %rd9, %r41, 4;
205 mov.u64 %rd10, c_clusters;
206 add.s64 %rd11, %rd10, %rd9;
207 ld.const.f32 %f27, [%rd11];
208 sub.f32 %f28, %f23, %f27;
209 fma.rn.f32 %f67, %f28, %f28, %f67;
210 add.s32 %r52, %r52, 1;
213 mad.lo.s32 %r42, %r52, %r23, %r4;
214 tex.1d.v4.f32.s32{%f29, %f30, %f31, %f32}, [t_features, {%r42}];
215 add.s32 %r43, %r52, %r8;
216 mul.wide.s32 %rd12, %r43, 4;
217 mov.u64 %rd13, c_clusters;
218 add.s64 %rd14, %rd13, %rd12;
219 ld.const.f32 %f33, [%rd14];
220 sub.f32 %f34, %f29, %f33;
221 fma.rn.f32 %f67, %f34, %f34, %f67;
222 add.s32 %r52, %r52, 1;
225 setp.lt.u32%p8, %r22, 4;
226 @%p8 bra BB1_12;
228 mad.lo.s32 %r44, %r22, %r48, %r52;
229 mul.wide.s32 %rd15, %r44, 4;
230 mov.u64 %rd16, c_clusters;
231 add.s64 %rd20, %rd16, %rd15;
232 mad.lo.s32 %r53, %r23, %r52, %r4;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
261 setp.lt.f32%p10, %f67, %f62;
262 selp.b32%r55, %r48, %r55, %p10;
263 selp.f32%f62, %f67, %f62, %p10;
264 add.s32 %r48, %r48, 1;
265 setp.lt.s32%p11, %r48, %r24;
266 @%p11 bra BB1_2;
172 mul.lo.s32 %r8, %r48, %r22;
173 mov.f32 %f67, 0f00000000;
174 setp.lt.s32%p4, %r22, 1;
175 @%p4 bra BB1_12;
177 and.b32 %r36, %r22, 3;
178 mov.u32 %r52, 0;
179 mov.f32 %f67, 0f00000000;
180 setp.eq.s32%p5, %r36, 0;
181 @%p5 bra BB1_9;
183 setp.eq.s32%p6, %r36, 1;
184 @%p6 bra BB1_8;
186 setp.eq.s32%p7, %r36, 2;
187 @%p7 bra BB1_7;
199 neg.s32 %r38, %r52;
200 and.b32 %r39, %r38, %r23;
201 add.s32 %r40, %r39, %r4;
202 tex.1d.v4.f32.s32{%f23, %f24, %f25, %f26}, [t_features, {%r40}];
203 add.s32 %r41, %r52, %r8;
204 mul.wide.s32 %rd9, %r41, 4;
205 mov.u64 %rd10, c_clusters;
206 add.s64 %rd11, %rd10, %rd9;
207 ld.const.f32 %f27, [%rd11];
208 sub.f32 %f28, %f23, %f27;
209 fma.rn.f32 %f67, %f28, %f28, %f67;
210 add.s32 %r52, %r52, 1;
213 mad.lo.s32 %r42, %r52, %r23, %r4;
214 tex.1d.v4.f32.s32{%f29, %f30, %f31, %f32}, [t_features, {%r42}];
215 add.s32 %r43, %r52, %r8;
216 mul.wide.s32 %rd12, %r43, 4;
217 mov.u64 %rd13, c_clusters;
218 add.s64 %rd14, %rd13, %rd12;
219 ld.const.f32 %f33, [%rd14];
220 sub.f32 %f34, %f29, %f33;
221 fma.rn.f32 %f67, %f34, %f34, %f67;
222 add.s32 %r52, %r52, 1;
225 setp.lt.u32%p8, %r22, 4;
226 @%p8 bra BB1_12;
228 mad.lo.s32 %r44, %r22, %r48, %r52;
229 mul.wide.s32 %rd15, %r44, 4;
230 mov.u64 %rd16, c_clusters;
231 add.s64 %rd20, %rd16, %rd15;
232 mad.lo.s32 %r53, %r23, %r52, %r4;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
261 setp.lt.f32%p10, %f67, %f62;
262 selp.b32%r55, %r48, %r55, %p10;
263 selp.f32%f62, %f67, %f62, %p10;
264 add.s32 %r48, %r48, 1;
265 setp.lt.s32%p11, %r48, %r24;
266 @%p11 bra BB1_2;
172 mul.lo.s32 %r8, %r48, %r22;
173 mov.f32 %f67, 0f00000000;
174 setp.lt.s32%p4, %r22, 1;
175 @%p4 bra BB1_12;
177 and.b32 %r36, %r22, 3;
178 mov.u32 %r52, 0;
179 mov.f32 %f67, 0f00000000;
180 setp.eq.s32%p5, %r36, 0;
181 @%p5 bra BB1_9;
183 setp.eq.s32%p6, %r36, 1;
184 @%p6 bra BB1_8;
186 setp.eq.s32%p7, %r36, 2;
187 @%p7 bra BB1_7;
199 neg.s32 %r38, %r52;
200 and.b32 %r39, %r38, %r23;
201 add.s32 %r40, %r39, %r4;
202 tex.1d.v4.f32.s32{%f23, %f24, %f25, %f26}, [t_features, {%r40}];
203 add.s32 %r41, %r52, %r8;
204 mul.wide.s32 %rd9, %r41, 4;
205 mov.u64 %rd10, c_clusters;
206 add.s64 %rd11, %rd10, %rd9;
207 ld.const.f32 %f27, [%rd11];
208 sub.f32 %f28, %f23, %f27;
209 fma.rn.f32 %f67, %f28, %f28, %f67;
210 add.s32 %r52, %r52, 1;
213 mad.lo.s32 %r42, %r52, %r23, %r4;
214 tex.1d.v4.f32.s32{%f29, %f30, %f31, %f32}, [t_features, {%r42}];
215 add.s32 %r43, %r52, %r8;
216 mul.wide.s32 %rd12, %r43, 4;
217 mov.u64 %rd13, c_clusters;
218 add.s64 %rd14, %rd13, %rd12;
219 ld.const.f32 %f33, [%rd14];
220 sub.f32 %f34, %f29, %f33;
221 fma.rn.f32 %f67, %f34, %f34, %f67;
222 add.s32 %r52, %r52, 1;
225 setp.lt.u32%p8, %r22, 4;
226 @%p8 bra BB1_12;
228 mad.lo.s32 %r44, %r22, %r48, %r52;
229 mul.wide.s32 %rd15, %r44, 4;
230 mov.u64 %rd16, c_clusters;
231 add.s64 %rd20, %rd16, %rd15;
232 mad.lo.s32 %r53, %r23, %r52, %r4;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
261 setp.lt.f32%p10, %f67, %f62;
262 selp.b32%r55, %r48, %r55, %p10;
263 selp.f32%f62, %f67, %f62, %p10;
264 add.s32 %r48, %r48, 1;
265 setp.lt.s32%p11, %r48, %r24;
266 @%p11 bra BB1_2;
172 mul.lo.s32 %r8, %r48, %r22;
173 mov.f32 %f67, 0f00000000;
174 setp.lt.s32%p4, %r22, 1;
175 @%p4 bra BB1_12;
177 and.b32 %r36, %r22, 3;
178 mov.u32 %r52, 0;
179 mov.f32 %f67, 0f00000000;
180 setp.eq.s32%p5, %r36, 0;
181 @%p5 bra BB1_9;
183 setp.eq.s32%p6, %r36, 1;
184 @%p6 bra BB1_8;
186 setp.eq.s32%p7, %r36, 2;
187 @%p7 bra BB1_7;
199 neg.s32 %r38, %r52;
200 and.b32 %r39, %r38, %r23;
201 add.s32 %r40, %r39, %r4;
202 tex.1d.v4.f32.s32{%f23, %f24, %f25, %f26}, [t_features, {%r40}];
203 add.s32 %r41, %r52, %r8;
204 mul.wide.s32 %rd9, %r41, 4;
205 mov.u64 %rd10, c_clusters;
206 add.s64 %rd11, %rd10, %rd9;
207 ld.const.f32 %f27, [%rd11];
208 sub.f32 %f28, %f23, %f27;
209 fma.rn.f32 %f67, %f28, %f28, %f67;
210 add.s32 %r52, %r52, 1;
213 mad.lo.s32 %r42, %r52, %r23, %r4;
214 tex.1d.v4.f32.s32{%f29, %f30, %f31, %f32}, [t_features, {%r42}];
215 add.s32 %r43, %r52, %r8;
216 mul.wide.s32 %rd12, %r43, 4;
217 mov.u64 %rd13, c_clusters;
218 add.s64 %rd14, %rd13, %rd12;
219 ld.const.f32 %f33, [%rd14];
220 sub.f32 %f34, %f29, %f33;
221 fma.rn.f32 %f67, %f34, %f34, %f67;
222 add.s32 %r52, %r52, 1;
225 setp.lt.u32%p8, %r22, 4;
226 @%p8 bra BB1_12;
228 mad.lo.s32 %r44, %r22, %r48, %r52;
229 mul.wide.s32 %rd15, %r44, 4;
230 mov.u64 %rd16, c_clusters;
231 add.s64 %rd20, %rd16, %rd15;
232 mad.lo.s32 %r53, %r23, %r52, %r4;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
261 setp.lt.f32%p10, %f67, %f62;
262 selp.b32%r55, %r48, %r55, %p10;
263 selp.f32%f62, %f67, %f62, %p10;
264 add.s32 %r48, %r48, 1;
265 setp.lt.s32%p11, %r48, %r24;
266 @%p11 bra BB1_2;
172 mul.lo.s32 %r8, %r48, %r22;
173 mov.f32 %f67, 0f00000000;
174 setp.lt.s32%p4, %r22, 1;
175 @%p4 bra BB1_12;
177 and.b32 %r36, %r22, 3;
178 mov.u32 %r52, 0;
179 mov.f32 %f67, 0f00000000;
180 setp.eq.s32%p5, %r36, 0;
181 @%p5 bra BB1_9;
183 setp.eq.s32%p6, %r36, 1;
184 @%p6 bra BB1_8;
186 setp.eq.s32%p7, %r36, 2;
187 @%p7 bra BB1_7;
199 neg.s32 %r38, %r52;
200 and.b32 %r39, %r38, %r23;
201 add.s32 %r40, %r39, %r4;
202 tex.1d.v4.f32.s32{%f23, %f24, %f25, %f26}, [t_features, {%r40}];
203 add.s32 %r41, %r52, %r8;
204 mul.wide.s32 %rd9, %r41, 4;
205 mov.u64 %rd10, c_clusters;
206 add.s64 %rd11, %rd10, %rd9;
207 ld.const.f32 %f27, [%rd11];
208 sub.f32 %f28, %f23, %f27;
209 fma.rn.f32 %f67, %f28, %f28, %f67;
210 add.s32 %r52, %r52, 1;
213 mad.lo.s32 %r42, %r52, %r23, %r4;
214 tex.1d.v4.f32.s32{%f29, %f30, %f31, %f32}, [t_features, {%r42}];
215 add.s32 %r43, %r52, %r8;
216 mul.wide.s32 %rd12, %r43, 4;
217 mov.u64 %rd13, c_clusters;
218 add.s64 %rd14, %rd13, %rd12;
219 ld.const.f32 %f33, [%rd14];
220 sub.f32 %f34, %f29, %f33;
221 fma.rn.f32 %f67, %f34, %f34, %f67;
222 add.s32 %r52, %r52, 1;
225 setp.lt.u32%p8, %r22, 4;
226 @%p8 bra BB1_12;
228 mad.lo.s32 %r44, %r22, %r48, %r52;
229 mul.wide.s32 %rd15, %r44, 4;
230 mov.u64 %rd16, c_clusters;
231 add.s64 %rd20, %rd16, %rd15;
232 mad.lo.s32 %r53, %r23, %r52, %r4;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
235 tex.1d.v4.f32.s32{%f35, %f36, %f37, %f38}, [t_features, {%r53}];
236 ld.const.f32 %f39, [%rd20];
237 sub.f32 %f40, %f35, %f39;
238 fma.rn.f32 %f41, %f40, %f40, %f67;
239 add.s32 %r45, %r23, %r53;
240 tex.1d.v4.f32.s32{%f42, %f43, %f44, %f45}, [t_features, {%r45}];
241 ld.const.f32 %f46, [%rd20+4];
242 sub.f32 %f47, %f42, %f46;
243 fma.rn.f32 %f48, %f47, %f47, %f41;
244 add.s32 %r46, %r23, %r45;
245 tex.1d.v4.f32.s32{%f49, %f50, %f51, %f52}, [t_features, {%r46}];
246 ld.const.f32 %f53, [%rd20+8];
247 sub.f32 %f54, %f49, %f53;
248 fma.rn.f32 %f55, %f54, %f54, %f48;
249 add.s32 %r47, %r23, %r46;
250 add.s32 %r53, %r23, %r47;
251 tex.1d.v4.f32.s32{%f56, %f57, %f58, %f59}, [t_features, {%r47}];
252 ld.const.f32 %f60, [%rd20+12];
253 sub.f32 %f61, %f56, %f60;
254 fma.rn.f32 %f67, %f61, %f61, %f55;
255 add.s64 %rd20, %rd20, 16;
256 add.s32 %r52, %r52, 4;
257 setp.lt.s32%p9, %r52, %r22;
258 @%p9 bra BB1_11;
261 setp.lt.f32%p10, %f67, %f62;
262 selp.b32%r55, %r48, %r55, %p10;
263 selp.f32%f62, %f67, %f62, %p10;
264 add.s32 %r48, %r48, 1;
265 setp.lt.s32%p11, %r48, %r24;
266 @%p11 bra BB1_2;
269 setp.ge.u32%p12, %r4, %r23;
270 @%p12 bra BB1_15;
272 cvta.to.global.u64 %rd17, %rd5;
273 mul.wide.u32 %rd18, %r4, 4;
274 add.s64 %rd19, %rd17, %rd18;
275 st.global.u32 [%rd19], %r55;
278 ret;
