../src/lowrisc_ibex_ibex_core_0.1/rtl/ibex_alu.sv Cov: 42.1%
1: // Copyright lowRISC contributors.
2: // Copyright 2018 ETH Zurich and University of Bologna, see also CREDITS.md.
3: // Licensed under the Apache License, Version 2.0, see LICENSE for details.
4: // SPDX-License-Identifier: Apache-2.0
5:
6: /**
7: * Arithmetic logic unit
8: */
9: module ibex_alu #(
10: parameter bit RV32B = 1'b0
11: ) (
12: input ibex_pkg::alu_op_e operator_i,
13: input logic [31:0] operand_a_i,
14: input logic [31:0] operand_b_i,
15:
16: input logic instr_first_cycle_i,
17:
18: input logic [32:0] multdiv_operand_a_i,
19: input logic [32:0] multdiv_operand_b_i,
20:
21: input logic multdiv_sel_i,
22:
23: input logic [31:0] imd_val_q_i,
24: output logic [31:0] imd_val_d_o,
25: output logic imd_val_we_o,
26:
27: output logic [31:0] adder_result_o,
28: output logic [33:0] adder_result_ext_o,
29:
30: output logic [31:0] result_o,
31: output logic comparison_result_o,
32: output logic is_equal_result_o
33: );
34: import ibex_pkg::*;
35:
36: logic [31:0] operand_a_rev;
37: logic [32:0] operand_b_neg;
38:
39: // bit reverse operand_a for left shifts and bit counting
40: for (genvar k = 0; k < 32; k++) begin : gen_rev_operand_a
41: assign operand_a_rev[k] = operand_a_i[31-k];
42: end
43:
44: ///////////
45: // Adder //
46: ///////////
47:
48: logic adder_op_b_negate;
49: logic [32:0] adder_in_a, adder_in_b;
50: logic [31:0] adder_result;
51:
52: always_comb begin
53: adder_op_b_negate = 1'b0;
54: unique case (operator_i)
55: // Adder OPs
56: ALU_SUB,
57:
58: // Comparator OPs
59: ALU_EQ, ALU_NE,
60: ALU_GE, ALU_GEU,
61: ALU_LT, ALU_LTU,
62: ALU_SLT, ALU_SLTU,
63:
64: // MinMax OPs (RV32B Ops)
65: ALU_MIN, ALU_MINU,
66: ALU_MAX, ALU_MAXU: adder_op_b_negate = 1'b1;
67:
68: default:;
69: endcase
70: end
71:
72: // prepare operand a
73: assign adder_in_a = multdiv_sel_i ? multdiv_operand_a_i : {operand_a_i,1'b1};
74:
75: // prepare operand b
76: assign operand_b_neg = {operand_b_i,1'b0} ^ {33{1'b1}};
77: always_comb begin
78: unique case(1'b1)
79: multdiv_sel_i: adder_in_b = multdiv_operand_b_i;
80: adder_op_b_negate: adder_in_b = operand_b_neg;
81: default : adder_in_b = {operand_b_i, 1'b0};
82: endcase
83: end
84:
85: // actual adder
86: assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
87:
88: assign adder_result = adder_result_ext_o[32:1];
89:
90: assign adder_result_o = adder_result;
91:
92: ////////////////
93: // Comparison //
94: ////////////////
95:
96: logic is_equal;
97: logic is_greater_equal; // handles both signed and unsigned forms
98: logic cmp_signed;
99:
100: always_comb begin
101: unique case (operator_i)
102: ALU_GE,
103: ALU_LT,
104: ALU_SLT,
105: // RV32B only
106: ALU_MIN,
107: ALU_MAX: cmp_signed = 1'b1;
108:
109: default: cmp_signed = 1'b0;
110: endcase
111: end
112:
113: assign is_equal = (adder_result == 32'b0);
114: assign is_equal_result_o = is_equal;
115:
116: // Is greater equal
117: always_comb begin
118: if ((operand_a_i[31] ^ operand_b_i[31]) == 1'b0) begin
119: is_greater_equal = (adder_result[31] == 1'b0);
120: end else begin
121: is_greater_equal = operand_a_i[31] ^ (cmp_signed);
122: end
123: end
124:
125: // GTE unsigned:
126: // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
127: // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
128: // (a[31] == 1 && b[31] == 0) => 1
129: // (a[31] == 0 && b[31] == 1) => 0
130:
131: // GTE signed:
132: // (a[31] == 1 && b[31] == 1) => adder_result[31] == 0
133: // (a[31] == 0 && b[31] == 0) => adder_result[31] == 0
134: // (a[31] == 1 && b[31] == 0) => 0
135: // (a[31] == 0 && b[31] == 1) => 1
136:
137: // generate comparison result
138: logic cmp_result;
139:
140: always_comb begin
141: unique case (operator_i)
142: ALU_EQ: cmp_result = is_equal;
143: ALU_NE: cmp_result = ~is_equal;
144: ALU_GE, ALU_GEU,
145: ALU_MAX, ALU_MAXU: cmp_result = is_greater_equal; // RV32B only
146: ALU_LT, ALU_LTU,
147: ALU_MIN, ALU_MINU, //RV32B only
148: ALU_SLT, ALU_SLTU: cmp_result = ~is_greater_equal;
149:
150: default: cmp_result = is_equal;
151: endcase
152: end
153:
154: assign comparison_result_o = cmp_result;
155:
156: ///////////
157: // Shift //
158: ///////////
159:
160: // The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for
161: // arithmetic shifts and one-shift support.
162: // Rotations and funnel shifts are implemented as multi-cycle instructions.
163: // The shifter is also used for single-bit instructions and bit-field place as detailed below.
164: //
165: // Standard Shifts
166: // ===============
167: // For standard shift instructions, the direction of the shift is to the right by default. For
168: // left shifts, the signal shift_left signal is set. If so, the operand is initially reversed,
169: // shifted to the right by the specified amount and shifted back again. For arithmetic- and
170: // one-shifts the 33rd bit of the shifter operand can is set accordingly.
171: //
172: // Multicycle Shifts
173: // =================
174: //
175: // Rotation
176: // --------
177: // For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and
178: // rs2 respectively.
179: //
180: // Rotation pseudocode:
181: // shift_amt = rs2 & 31;
182: // multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt));
183: // ^-- cycle 0 -----^ ^-- cycle 1 --------------^
184: //
185: // Funnel Shifts
186: // -------------
187: // For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the
188: // second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or
189: // its complement is determined by bit [5] of shift_amt.
190: //
191: // Funnel shift Pseudocode: (fsl)
192: // shift_amt = rs2 & 63;
193: // shift_amt_compl = 32 - shift_amt[4:0]
194: // if (shift_amt >=33):
195: // multicycle_result = (rs1 >> shift_amt_cmpl[4:0]) | (rs3 << shift_amt[4:0]);
196: // ^-- cycle 0 ---------------^ ^-- cycle 1 ------------^
197: // else if (shift_amt <= 31 && shift_amt > 0):
198: // multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]);
199: // ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^
200: // For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0.
201: // these cases need to be handled separately outside the shifting structure:
202: // else if (shift_amt == 32):
203: // multicycle_result = rs3
204: // else if (shift_amt == 0):
205: // multicycle_result = rs1.
206: //
207: // Single-Bit Instructions
208: // =======================
209: // Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i.
210:
211: // The operations sbset, sbclr and sbinv are implemented by generation of a bit-mask using the
212: // shifter structure. This is done by left-shifting the operand 32'h1 by the required amount.
213: // The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left.
214: // Further processing is taken care of by a separate structure.
215: //
216: // For sbext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply
217: // shifting operand_a_i to the right by the required amount and returning bit [0] of the result.
218: //
219: // Bit-Field Place
220: // ===============
221: // The shifter structure is shared to compute bfp_mask << bfp_off.
222:
223: logic shift_left;
224: logic shift_ones;
225: logic shift_arith;
226: logic shift_funnel;
227: logic shift_sbmode;
228: logic [5:0] shift_amt;
229: logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt)
230:
231: logic [31:0] shift_result;
232: logic [32:0] shift_result_ext;
233: logic [31:0] shift_result_rev;
234:
235: // zbf
236: logic bfp_op;
237: logic [4:0] bfp_len;
238: logic [4:0] bfp_off;
239: logic [31:0] bfp_mask;
240: logic [31:0] bfp_mask_rev;
241: logic [31:0] bfp_result;
242:
243: // bfp: shares the shifter structure to compute bfp_mask << bfp_off
244: assign bfp_op = RV32B ? (operator_i == ALU_BFP) : 1'b0;
245: assign bfp_len = {~(|operand_b_i[27:24]), operand_b_i[27:24]}; // len = 0 encodes for len = 16
246: assign bfp_off = operand_b_i[20:16];
247: assign bfp_mask = RV32B ? ~(32'hffff_ffff << bfp_len) : '0;
248: for (genvar i=0; i<32; i++) begin : gen_rev_bfp_mask
249: assign bfp_mask_rev[i] = bfp_mask[31-i];
250: end
251:
252: assign bfp_result =
253: RV32B ? (~shift_result & operand_a_i) | ((operand_b_i & bfp_mask) << bfp_off) : '0;
254:
255: // bit shift_amt[5]: word swap bit: only considered for FSL/FSR.
256: // if set, reverse operations in first and second cycle.
257: assign shift_amt[5] = operand_b_i[5] & shift_funnel;
258: assign shift_amt_compl = 32 - operand_b_i[4:0];
259:
260: always_comb begin
261: if (bfp_op) begin
262: shift_amt[4:0] = bfp_off ; // length field of bfp control word
263: end else begin
264: shift_amt[4:0] = instr_first_cycle_i ?
265: (operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) :
266: (operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]);
267: end
268: end
269:
270:
271: // single-bit mode: shift
272: assign shift_sbmode = RV32B ?
273: (operator_i == ALU_SBSET) | (operator_i == ALU_SBCLR) | (operator_i == ALU_SBINV) : 1'b0;
274:
275: // left shift if this is:
276: // * a standard left shift (slo, sll)
277: // * a rol in the first cycle
278: // * a ror in the second cycle
279: // * fsl: without word-swap bit: first cycle, else: second cycle
280: // * fsr: without word-swap bit: second cycle, else: first cycle
281: // * a single-bit instruction: sbclr, sbset, sbinv (excluding sbext)
282: // * bfp: bfp_mask << bfp_off
283: always_comb begin
284: unique case (operator_i)
285: ALU_SLL: shift_left = 1'b1;
286: ALU_SLO,
287: ALU_BFP: shift_left = RV32B ? 1'b1 : 1'b0;
288: ALU_ROL: shift_left = RV32B ? instr_first_cycle_i : 0;
289: ALU_ROR: shift_left = RV32B ? ~instr_first_cycle_i : 0;
290: ALU_FSL: shift_left =
291: RV32B ? (shift_amt[5] ? ~instr_first_cycle_i : instr_first_cycle_i) : 1'b0;
292: ALU_FSR: shift_left =
293: RV32B ? (shift_amt[5] ? instr_first_cycle_i : ~instr_first_cycle_i) : 1'b0;
294: default: shift_left = 1'b0;
295: endcase
296: if (shift_sbmode) begin
297: shift_left = 1'b1;
298: end
299: end
300:
301: assign shift_arith = (operator_i == ALU_SRA);
302: assign shift_ones = RV32B ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0;
303: assign shift_funnel = RV32B ? (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0;
304:
305: // shifter structure.
306: always_comb begin
307:
308: // select shifter input
309: // for bfp, sbmode and shift_left the corresponding bit-reversed input is chosen.
310: if (shift_sbmode) begin
311: shift_result = 32'h8000_0000; // rev(32'h1)
312: end else begin
313: unique case (1'b1)
314: bfp_op: shift_result = bfp_mask_rev;
315: shift_left: shift_result = operand_a_rev;
316: default: shift_result = operand_a_i;
317: endcase
318: end
319:
320:
321: shift_result_ext =
322: $signed({shift_ones | (shift_arith & shift_result[31]), shift_result}) >>> shift_amt[4:0];
323:
324: shift_result = shift_result_ext[31:0];
325:
326: for (int unsigned i=0; i<32; i++) begin
327: shift_result_rev[i] = shift_result[31-i];
328: end
329:
330: shift_result = shift_left ? shift_result_rev : shift_result;
331:
332: end
333:
334: ///////////////////
335: // Bitwise Logic //
336: ///////////////////
337:
338: logic bwlogic_or;
339: logic bwlogic_and;
340: logic [31:0] bwlogic_operand_b;
341: logic [31:0] bwlogic_or_result;
342: logic [31:0] bwlogic_and_result;
343: logic [31:0] bwlogic_xor_result;
344: logic [31:0] bwlogic_result;
345:
346: logic bwlogic_op_b_negate;
347:
348: always_comb begin
349: unique case (operator_i)
350: // Logic-with-negate OPs (RV32B Ops)
351: ALU_XNOR,
352: ALU_ORN,
353: ALU_ANDN: bwlogic_op_b_negate = RV32B ? 1'b1 : 1'b0;
354: ALU_CMIX: bwlogic_op_b_negate = RV32B ? ~instr_first_cycle_i : 1'b0;
355: default: bwlogic_op_b_negate = 1'b0;
356: endcase
357: end
358:
359: assign bwlogic_operand_b = bwlogic_op_b_negate ? operand_b_neg[32:1] : operand_b_i;
360:
361: assign bwlogic_or_result = operand_a_i | bwlogic_operand_b;
362: assign bwlogic_and_result = operand_a_i & bwlogic_operand_b;
363: assign bwlogic_xor_result = operand_a_i ^ bwlogic_operand_b;
364:
365: assign bwlogic_or = (operator_i == ALU_OR) | (operator_i == ALU_ORN);
366: assign bwlogic_and = (operator_i == ALU_AND) | (operator_i == ALU_ANDN);
367:
368: always_comb begin
369: unique case (1'b1)
370: bwlogic_or: bwlogic_result = bwlogic_or_result;
371: bwlogic_and: bwlogic_result = bwlogic_and_result;
372: default: bwlogic_result = bwlogic_xor_result;
373: endcase
374: end
375:
376: logic [31:0] shuffle_result;
377: logic [31:0] butterfly_result;
378: logic [31:0] invbutterfly_result;
379:
380: logic [31:0] minmax_result;
381: logic [5:0] bitcnt_result;
382: logic [31:0] pack_result;
383: logic [31:0] sext_result;
384: logic [31:0] multicycle_result;
385: logic [31:0] singlebit_result;
386: logic [31:0] clmul_result;
387:
388: if (RV32B) begin : g_alu_rvb
389:
390: /////////////////
391: // Bitcounting //
392: /////////////////
393:
394: // The bit-counter structure computes the number of set bits in its operand. Partial results
395: // (from left to right) are needed to compute the control masks for computation of bext/bdep
396: // by the butterfly network, if implemented.
397: // For pcnt, clz and ctz, only the end result is used.
398:
399: logic zbe_op;
400: logic bitcnt_ctz;
401: logic bitcnt_clz;
402: logic bitcnt_cz;
403: logic [31:0] bitcnt_bits;
404: logic [31:0] bitcnt_mask_op;
405: logic [31:0] bitcnt_bit_mask;
406: logic [ 5:0] bitcnt_partial [32];
407:
408:
409: assign bitcnt_ctz = operator_i == ALU_CTZ;
410: assign bitcnt_clz = operator_i == ALU_CLZ;
411: assign bitcnt_cz = bitcnt_ctz | bitcnt_clz;
412: assign bitcnt_result = bitcnt_partial[31];
413:
414: // Bit-mask generation for clz and ctz:
415: // The bit mask is generated by spreading the lowest-order set bit in the operand to all
416: // higher order bits. The resulting mask is inverted to cover the lowest order zeros. In order
417: // to create the bit mask for leading zeros, the input operand needs to be reversed.
418: assign bitcnt_mask_op = bitcnt_clz ? operand_a_rev : operand_a_i;
419:
420: always_comb begin
421: bitcnt_bit_mask = bitcnt_mask_op;
422: bitcnt_bit_mask |= bitcnt_bit_mask << 1;
423: bitcnt_bit_mask |= bitcnt_bit_mask << 2;
424: bitcnt_bit_mask |= bitcnt_bit_mask << 4;
425: bitcnt_bit_mask |= bitcnt_bit_mask << 8;
426: bitcnt_bit_mask |= bitcnt_bit_mask << 16;
427: bitcnt_bit_mask = ~bitcnt_bit_mask;
428: end
429:
430: always_comb begin
431: case(1'b1)
432: zbe_op: bitcnt_bits = operand_b_i;
433: bitcnt_cz: bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz
434: default: bitcnt_bits = operand_a_i; // pcnt
435: endcase
436: end
437:
438: // The parallel prefix counter is of the structure of a Brent-Kung Adder. In the first
439: // log2(width) stages, the sum of the n preceding bit lines is computed for the bit lines at
440: // positions 2**n-1 (power-of-two positions) where n denotes the current stage.
441: // In stage n=log2(width), the count for position width-1 (the MSB) is finished.
442: // For the intermediate values, an inverse adder tree then computes the bit counts for the bit
443: // lines at positions
444: // m = 2**(n-1) + i*2**(n-2), where i = [1 ... width / 2**(n-1)-1] and n = [log2(width) ... 2].
445: // Thus, at every subsequent stage the result of two previously unconnected sub-trees is
446: // summed, starting at the node summing bits [width/2-1 : 0] and [3*width/4-1: width/2]
447: // and moving to iteratively sum up all the sub-trees.
448: // The inverse adder tree thus features log2(width) - 1 stages the first of these stages is a
449: // single addition at position 3*width/4 - 1. It does not interfere with the last
450: // stage of the primary adder tree. These stages can thus be folded together, resulting in a
451: // total of 2*log2(width)-2 stages.
452: // For more details refer to R. Brent, H. T. Kung, "A Regular Layout for Parallel Adders",
453: // (1982).
454: // For a bitline at position p, only bits
455: // bitcnt_partial[max(i, such that p % log2(i) == 0)-1 : 0] are needed for generation of the
456: // butterfly network control signals. The adders in the intermediate value adder tree thus need
457: // not be full 5-bit adders. We leave the optimization to the synthesis tools.
458: //
459: // Consider the following 8-bit example for illustraton.
460: //
461: // let bitcnt_bits = 8'babcdefgh.
462: //
463: // a b c d e f g h
464: // | /: | /: | /: | /:
465: // |/ : |/ : |/ : |/ :
466: // stage 1: + : + : + : + :
467: // | : /: : | : /: :
468: // |,--+ : : |,--+ : :
469: // stage 2: + : : : + : : :
470: // | : | : /: : : :
471: // |,-----,--+ : : : : ^-primary adder tree
472: // stage 3: + : + : : : : : -------------------------
473: // : | /| /| /| /| /| : ,-intermediate adder tree
474: // : |/ |/ |/ |/ |/ : :
475: // stage 4 : + + + + + : :
476: // : : : : : : : :
477: // bitcnt_partial[i] 7 6 5 4 3 2 1 0
478:
479: always_comb begin
480: bitcnt_partial = '{default: '0};
481: // stage 1
482: for (int unsigned i=1; i<32; i+=2) begin
483: bitcnt_partial[i] = {5'h0, bitcnt_bits[i]} + {5'h0, bitcnt_bits[i-1]};
484: end
485: // stage 2
486: for (int unsigned i=3; i<32; i+=4) begin
487: bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
488: end
489: // stage 3
490: for (int unsigned i=7; i<32; i+=8) begin
491: bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
492: end
493: // stage 4
494: for (int unsigned i=15; i <32; i+=16) begin
495: bitcnt_partial[i] = bitcnt_partial[i-8] + bitcnt_partial[i];
496: end
497: // stage 5
498: bitcnt_partial[31] = bitcnt_partial[15] + bitcnt_partial[31];
499: // ^- primary adder tree
500: // -------------------------------
501: // ,-intermediate value adder tree
502: bitcnt_partial[23] = bitcnt_partial[15] + bitcnt_partial[23];
503:
504: // stage 6
505: for (int unsigned i=11; i<32; i+=8) begin
506: bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
507: end
508:
509: // stage 7
510: for (int unsigned i=5; i<32; i+=4) begin
511: bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
512: end
513: // stage 8
514: bitcnt_partial[0] = {5'h0, bitcnt_bits[0]};
515: for (int unsigned i=2; i<32; i+=2) begin
516: bitcnt_partial[i] = bitcnt_partial[i-1] + {5'h0, bitcnt_bits[i]};
517: end
518: end
519:
520: ///////////////
521: // Butterfly //
522: ///////////////
523:
524: // The butterfly / inverse butterfly network is shared between bext/bdep (zbe)instructions
525: // respectively and grev / gorc instructions (zbp).
526: // For bdep, the control bits mask of a local left region is generated by
527: // the inverse of a n-bit left rotate and complement upon wrap (LROTC) operation by the number
528: // of ones in the deposit bitmask to the right of the segment. n hereby denotes the width
529: // of the according segment. The bitmask for a pertaining local right region is equal to the
530: // corresponding local left region. Bext uses an analogue inverse process.
531: // Consider the following 8-bit example. For details, see Hilewitz et al. "Fast Bit Gather,
532: // Bit Scatter and Bit Permuation Instructions for Commodity Microprocessors", (2008).
533:
534: // 8-bit example: (Hilewitz et al.)
535: // Consider the instruction bdep operand_a_i deposit_mask
536: // Let operand_a_i = 8'babcd_efgh
537: // deposit_mask = 8'b1010_1101
538: //
539: // control bitmask for stage 1:
540: // - number of ones in the right half of the deposit bitmask: 3
541: // - width of the segment: 4
542: // - control bitmask = ~LROTC(4'b0, 3)[3:0] = 4'b1000
543: //
544: // control bitmask: c3 c2 c1 c0 c3 c2 c1 c0
545: // 1 0 0 0 1 0 0 0
546: // <- L -----> <- R ----->
547: // operand_a_i a b c d e f g h
548: // :\ | | | /: | | |
549: // : +|---|--|-+ : | | |
550: // :/ | | | \: | | |
551: // stage 1 e b c d a f g h
552: //
553: // control bitmask: c3 c2 c3 c2 c1 c0 c1 c0
554: // 1 1 1 1 1 0 1 0
555: // :\ :\ /: /: :\ | /: |
556: // : +:-+-:+ : : +|-+ : |
557: // :/ :/ \: \: :/ | \: |
558: // stage 2 c d e b g f a h
559: // L R L R L R L R
560: // control bitmask: c3 c3 c2 c2 c1 c1 c0 c0
561: // 1 1 0 0 1 1 0 0
562: // :\/: | | :\/: | |
563: // : : | | : : | |
564: // :/\: | | :/\: | |
565: // stage 3 d c e b f g a h
566: // & deposit bitmask: 1 0 1 0 1 1 0 1
567: // result: d 0 e 0 f g 0 h
568:
569: assign zbe_op = (operator_i == ALU_BEXT) | (operator_i == ALU_BDEP);
570:
571: logic [31:0] butterfly_mask_l[5];
572: logic [31:0] butterfly_mask_r[5];
573: logic [31:0] butterfly_mask_not[5];
574: logic [31:0] lrotc_stage [5]; // left rotate and complement upon wrap
575:
576: // bext / bdep
577: logic [31:0] butterfly_zbe_mask_l[5];
578: logic [31:0] butterfly_zbe_mask_r[5];
579: logic [31:0] butterfly_zbe_mask_not[5];
580:
581: // grev / gorc
582: logic [31:0] butterfly_zbp_mask_l[5];
583: logic [31:0] butterfly_zbp_mask_r[5];
584: logic [31:0] butterfly_zbp_mask_not[5];
585:
586: logic grev_op;
587: logic gorc_op;
588: logic zbp_op;
589:
590: // number of bits in local r = 32 / 2**(stage + 1) = 16/2**stage
591: `define _N(stg) (16 >> stg)
592:
593: // bext / bdep control bit generation
594: for (genvar stg=0; stg<5; stg++) begin
595: // number of segs: 2** stg
596: for (genvar seg=0; seg<2**stg; seg++) begin
597:
598: assign lrotc_stage[stg][2*`_N(stg)*(seg+1)-1 : 2*`_N(stg)*seg] =
599: {{`_N(stg){1'b0}},{`_N(stg){1'b1}}} <<
600: bitcnt_partial[`_N(stg)*(2*seg+1)-1][$clog2(`_N(stg)):0];
601:
602: assign butterfly_zbe_mask_l[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]
603: = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
604:
605: assign butterfly_zbe_mask_r[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)]
606: = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
607:
608: assign butterfly_zbe_mask_l[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)] = '0;
609: assign butterfly_zbe_mask_r[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] = '0;
610: end
611: end
612: `undef _N
613:
614: for (genvar stg=0; stg<5; stg++) begin
615: assign butterfly_zbe_mask_not[stg] =
616: ~(butterfly_zbe_mask_l[stg] | butterfly_zbe_mask_r[stg]);
617: end
618:
619: // grev / gorc control bit generation
620: assign butterfly_zbp_mask_l[0] = shift_amt[4] ? 32'hffff_0000 : 32'h0000_0000;
621: assign butterfly_zbp_mask_r[0] = shift_amt[4] ? 32'h0000_ffff : 32'h0000_0000;
622: assign butterfly_zbp_mask_not[0] =
623: !shift_amt[4] || (shift_amt[4] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
624:
625: assign butterfly_zbp_mask_l[1] = shift_amt[3] ? 32'hff00_ff00 : 32'h0000_0000;
626: assign butterfly_zbp_mask_r[1] = shift_amt[3] ? 32'h00ff_00ff : 32'h0000_0000;
627: assign butterfly_zbp_mask_not[1] =
628: !shift_amt[3] || (shift_amt[3] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
629:
630: assign butterfly_zbp_mask_l[2] = shift_amt[2] ? 32'hf0f0_f0f0 : 32'h0000_0000;
631: assign butterfly_zbp_mask_r[2] = shift_amt[2] ? 32'h0f0f_0f0f : 32'h0000_0000;
632: assign butterfly_zbp_mask_not[2] =
633: !shift_amt[2] || (shift_amt[2] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
634:
635: assign butterfly_zbp_mask_l[3] = shift_amt[1] ? 32'hcccc_cccc : 32'h0000_0000;
636: assign butterfly_zbp_mask_r[3] = shift_amt[1] ? 32'h3333_3333 : 32'h0000_0000;
637: assign butterfly_zbp_mask_not[3] =
638: !shift_amt[1] || (shift_amt[1] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
639:
640: assign butterfly_zbp_mask_l[4] = shift_amt[0] ? 32'haaaa_aaaa : 32'h0000_0000;
641: assign butterfly_zbp_mask_r[4] = shift_amt[0] ? 32'h5555_5555 : 32'h0000_0000;
642: assign butterfly_zbp_mask_not[4] =
643: !shift_amt[0] || (shift_amt[0] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
644:
645: // grev / gorc instructions
646: assign grev_op = RV32B ? (operator_i == ALU_GREV) : 1'b0;
647: assign gorc_op = RV32B ? (operator_i == ALU_GORC) : 1'b0;
648: assign zbp_op = grev_op | gorc_op;
649:
650: // select set of masks:
651: assign butterfly_mask_l = zbp_op ? butterfly_zbp_mask_l : butterfly_zbe_mask_l;
652: assign butterfly_mask_r = zbp_op ? butterfly_zbp_mask_r : butterfly_zbe_mask_r;
653: assign butterfly_mask_not = zbp_op ? butterfly_zbp_mask_not : butterfly_zbe_mask_not;
654:
655: always_comb begin
656: butterfly_result = operand_a_i;
657:
658: butterfly_result = butterfly_result & butterfly_mask_not[0] |
659: ((butterfly_result & butterfly_mask_l[0]) >> 16)|
660: ((butterfly_result & butterfly_mask_r[0]) << 16);
661:
662: butterfly_result = butterfly_result & butterfly_mask_not[1] |
663: ((butterfly_result & butterfly_mask_l[1]) >> 8)|
664: ((butterfly_result & butterfly_mask_r[1]) << 8);
665:
666: butterfly_result = butterfly_result & butterfly_mask_not[2] |
667: ((butterfly_result & butterfly_mask_l[2]) >> 4)|
668: ((butterfly_result & butterfly_mask_r[2]) << 4);
669:
670: butterfly_result = butterfly_result & butterfly_mask_not[3] |
671: ((butterfly_result & butterfly_mask_l[3]) >> 2)|
672: ((butterfly_result & butterfly_mask_r[3]) << 2);
673:
674: butterfly_result = butterfly_result & butterfly_mask_not[4] |
675: ((butterfly_result & butterfly_mask_l[4]) >> 1)|
676: ((butterfly_result & butterfly_mask_r[4]) << 1);
677:
678: if (!zbp_op) begin
679: butterfly_result = butterfly_result & operand_b_i;
680: end
681: end
682:
683: always_comb begin
684: invbutterfly_result = operand_a_i & operand_b_i;
685:
686: invbutterfly_result = invbutterfly_result & butterfly_mask_not[4] |
687: ((invbutterfly_result & butterfly_mask_l[4]) >> 1)|
688: ((invbutterfly_result & butterfly_mask_r[4]) << 1);
689:
690: invbutterfly_result = invbutterfly_result & butterfly_mask_not[3] |
691: ((invbutterfly_result & butterfly_mask_l[3]) >> 2)|
692: ((invbutterfly_result & butterfly_mask_r[3]) << 2);
693:
694: invbutterfly_result = invbutterfly_result & butterfly_mask_not[2] |
695: ((invbutterfly_result & butterfly_mask_l[2]) >> 4)|
696: ((invbutterfly_result & butterfly_mask_r[2]) << 4);
697:
698: invbutterfly_result = invbutterfly_result & butterfly_mask_not[1] |
699: ((invbutterfly_result & butterfly_mask_l[1]) >> 8)|
700: ((invbutterfly_result & butterfly_mask_r[1]) << 8);
701:
702: invbutterfly_result = invbutterfly_result & butterfly_mask_not[0] |
703: ((invbutterfly_result & butterfly_mask_l[0]) >> 16)|
704: ((invbutterfly_result & butterfly_mask_r[0]) << 16);
705: end
706:
707: /////////////////////////
708: // Shuffle / Unshuffle //
709: /////////////////////////
710:
711: localparam logic [31:0] SHUFFLE_MASK_L [0:3] =
712: '{32'h00ff_0000, 32'h0f00_0f00, 32'h3030_3030, 32'h4444_4444};
713: localparam logic [31:0] SHUFFLE_MASK_R [0:3] =
714: '{32'h0000_ff00, 32'h00f0_00f0, 32'h0c0c_0c0c, 32'h2222_2222};
715:
716: localparam logic [31:0] FLIP_MASK_L [0:3] =
717: '{32'h2200_1100, 32'h0044_0000, 32'h4411_0000, 32'h1100_0000};
718: localparam logic [31:0] FLIP_MASK_R [0:3] =
719: '{32'h0088_0044, 32'h0000_2200, 32'h0000_8822, 32'h0000_0088};
720:
721: logic [31:0] SHUFFLE_MASK_NOT [0:3];
722: for(genvar i = 0; i < 4; i++) begin : gen_shuffle_mask_not
723: assign SHUFFLE_MASK_NOT[i] = ~(SHUFFLE_MASK_L[i] | SHUFFLE_MASK_R[i]);
724: end
725:
726: logic shuffle_flip;
727: assign shuffle_flip = operator_i == ALU_UNSHFL;
728:
729: logic [3:0] shuffle_mode;
730:
731: always_comb begin
732: shuffle_result = operand_a_i;
733:
734: if (shuffle_flip) begin
735: shuffle_mode[3] = shift_amt[0];
736: shuffle_mode[2] = shift_amt[1];
737: shuffle_mode[1] = shift_amt[2];
738: shuffle_mode[0] = shift_amt[3];
739: end else begin
740: shuffle_mode = shift_amt[3:0];
741: end
742:
743: if (shuffle_flip) begin
744: shuffle_result = (shuffle_result & 32'h8822_4411) |
745: ((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
746: ((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
747: ((shuffle_result << 15) & FLIP_MASK_L[2]) | ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
748: ((shuffle_result << 21) & FLIP_MASK_L[3]) | ((shuffle_result >> 21) & FLIP_MASK_R[3]);
749: end
750:
751: if (shuffle_mode[3]) begin
752: shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[0]) |
753: (((shuffle_result << 8) & SHUFFLE_MASK_L[0]) |
754: ((shuffle_result >> 8) & SHUFFLE_MASK_R[0]));
755: end
756: if (shuffle_mode[2]) begin
757: shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[1]) |
758: (((shuffle_result << 4) & SHUFFLE_MASK_L[1]) |
759: ((shuffle_result >> 4) & SHUFFLE_MASK_R[1]));
760: end
761: if (shuffle_mode[1]) begin
762: shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[2]) |
763: (((shuffle_result << 2) & SHUFFLE_MASK_L[2]) |
764: ((shuffle_result >> 2) & SHUFFLE_MASK_R[2]));
765: end
766: if (shuffle_mode[0]) begin
767: shuffle_result = (shuffle_result & SHUFFLE_MASK_NOT[3]) |
768: (((shuffle_result << 1) & SHUFFLE_MASK_L[3]) |
769: ((shuffle_result >> 1) & SHUFFLE_MASK_R[3]));
770: end
771:
772: if (shuffle_flip) begin
773: shuffle_result = (shuffle_result & 32'h8822_4411) |
774: ((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
775: ((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
776: ((shuffle_result << 15) & FLIP_MASK_L[2]) | ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
777: ((shuffle_result << 21) & FLIP_MASK_L[3]) | ((shuffle_result >> 21) & FLIP_MASK_R[3]);
778: end
779:
780: end
781: ///////////////////////////////////////////////////
782: // Carry-less Multiply + Cyclic Redundancy Check //
783: ///////////////////////////////////////////////////
784:
785: // Carry-less multiplication can be understood as multiplication based on
786: // the addition interpreted as the bit-wise xor operation.
787: //
788: // Example: 1101 X 1011 = 1111111:
789: //
790: // 1011 X 1101
791: // -----------
792: // 1101
793: // xor 1101
794: // ---------
795: // 10111
796: // xor 0000
797: // ----------
798: // 010111
799: // xor 1101
800: // -----------
801: // 1111111
802: //
803: // Architectural details:
804: // A 32 x 32-bit array
805: // [ operand_b[i] ? (operand_a << i) : '0 for i in 0 ... 31 ]
806: // is generated. The entries of the array are pairwise 'xor-ed'
807: // together in a 5-stage binary tree.
808: //
809: //
810: // Cyclic Redundancy Check:
811: //
812: // CRC-32 (CRC-32/ISO-HDLC) and CRC-32C (CRC-32/ISCSI) are directly implemented. For
813: // documentation of the crc configuration (crc-polynomials, initialization, reflection, etc.)
814: // see http://reveng.sourceforge.net/crc-catalogue/all.htm
815: // A useful guide to crc arithmetic and algorithms is given here:
816: // http://www.piclist.com/techref/method/math/crcguide.html.
817: //
818: // The CRC operation solves the following equation using binary polynomial arithmetic:
819: //
820: // rev(rd)(x) = rev(rs1)(x) * x**n mod {1, P}(x)
821: //
822: // where P denotes lower 32 bits of the corresponding CRC polynomial, rev(a) the bit reversal
823: // of a, n = 8,16, or 32 for .b, .h, .w -variants. {a, b} denotes bit concatenation.
824: //
825: // Using barret reduction, one can show that
826: //
827: // M(x) mod P(x) = R(x) =
828: // (M(x) * x**n) & {deg(P(x)'{1'b1}}) ^ (M(x) x**-(deg(P(x) - n)) cx mu(x) cx P(x),
829: //
830: // Where mu(x) = polydiv(x**64, {1,P}) & 0xffffffff. Here, 'cx' refers to carry-less
831: // multiplication. Substituting rev(rd)(x) for R(x) and rev(rs1)(x) for M(x) and solving for
832: // rd(x) with P(x) a crc32 polynomial (deg(P(x)) = 32), we get
833: //
834: // rd = rev( (rev(rs1) << n) ^ ((rev(rs1) >> (32-n)) cx mu cx P)
835: // = (rs1 >> n) ^ rev(rev( (rs1 << (32-n)) cx rev(mu)) cx P)
836: // ^-- cycle 0--------------------^
837: // ^- cycle 1 -------------------------------------------^
838: //
839: // In the last step we used the fact that carry-less multiplication is bit-order agnostic:
840: // rev(a cx b) = rev(a) cx rev(b).
841:
842: logic clmul_rmode;
843: logic clmul_hmode;
844: logic [31:0] clmul_op_a;
845: logic [31:0] clmul_op_b;
846: logic [31:0] operand_b_rev;
847: logic [31:0] clmul_and_stage[32];
848: logic [31:0] clmul_xor_stage1[16];
849: logic [31:0] clmul_xor_stage2[8];
850: logic [31:0] clmul_xor_stage3[4];
851: logic [31:0] clmul_xor_stage4[2];
852:
853: logic [31:0] clmul_result_raw;
854: logic [31:0] clmul_result_rev;
855:
856: for (genvar i=0; i<32; i++) begin: gen_rev_operand_b
857: assign operand_b_rev[i] = operand_b_i[31-i];
858: end
859:
860: assign clmul_rmode = operator_i == ALU_CLMULR;
861: assign clmul_hmode = operator_i == ALU_CLMULH;
862:
863: // CRC
864: localparam logic [31:0] CRC32_POLYNOMIAL = 32'h04c1_1db7;
865: localparam logic [31:0] CRC32_MU_REV = 32'hf701_1641;
866:
867: localparam logic [31:0] CRC32C_POLYNOMIAL = 32'h1edc_6f41;
868: localparam logic [31:0] CRC32C_MU_REV = 32'hdea7_13f1;
869:
870: logic crc_op;
871: logic crc_hmode;
872: logic crc_bmode;
873:
874: logic crc_cpoly;
875:
876: logic [31:0] crc_operand;
877: logic [31:0] crc_poly;
878: logic [31:0] crc_mu_rev;
879:
880: assign crc_op = (operator_i == ALU_CRC32C_W) | (operator_i == ALU_CRC32_W) |
881: (operator_i == ALU_CRC32C_H) | (operator_i == ALU_CRC32_H) |
882: (operator_i == ALU_CRC32C_B) | (operator_i == ALU_CRC32_B);
883:
884: assign crc_cpoly = (operator_i == ALU_CRC32C_W) |
885: (operator_i == ALU_CRC32C_H) |
886: (operator_i == ALU_CRC32C_B);
887:
888: assign crc_hmode = (operator_i == ALU_CRC32_H) | (operator_i == ALU_CRC32C_H);
889: assign crc_bmode = (operator_i == ALU_CRC32_B) | (operator_i == ALU_CRC32C_B);
890:
891: assign crc_poly = crc_cpoly ? CRC32C_POLYNOMIAL : CRC32_POLYNOMIAL;
892: assign crc_mu_rev = crc_cpoly ? CRC32C_MU_REV : CRC32_MU_REV;
893:
894: always_comb begin
895: unique case(1'b1)
896: crc_bmode: crc_operand = {operand_a_i[7:0], 24'h0};
897: crc_hmode: crc_operand = {operand_a_i[15:0], 16'h0};
898: default: crc_operand = operand_a_i;
899: endcase
900: end
901:
902: // Select clmul input
903: always_comb begin
904: if (crc_op) begin
905: clmul_op_a = instr_first_cycle_i ? crc_operand : imd_val_q_i;
906: clmul_op_b = instr_first_cycle_i ? crc_mu_rev : crc_poly;
907: end else begin
908: clmul_op_a = clmul_rmode | clmul_hmode ? operand_a_rev : operand_a_i;
909: clmul_op_b = clmul_rmode | clmul_hmode ? operand_b_rev : operand_b_i;
910: end
911: end
912:
913: for (genvar i=0; i<32; i++) begin : gen_clmul_and_op
914: assign clmul_and_stage[i] = clmul_op_b[i] ? clmul_op_a << i : '0;
915: end
916:
917: for (genvar i=0; i<16; i++) begin : gen_clmul_xor_op_l1
918: assign clmul_xor_stage1[i] = clmul_and_stage[2*i] ^ clmul_and_stage[2*i+1];
919: end
920:
921: for (genvar i=0; i<8; i++) begin : gen_clmul_xor_op_l2
922: assign clmul_xor_stage2[i] = clmul_xor_stage1[2*i] ^ clmul_xor_stage1[2*i+1];
923: end
924:
925: for (genvar i=0; i<4; i++) begin : gen_clmul_xor_op_l3
926: assign clmul_xor_stage3[i] = clmul_xor_stage2[2*i] ^ clmul_xor_stage2[2*i+1];
927: end
928:
929: for (genvar i=0; i<2; i++) begin : gen_clmul_xor_op_l4
930: assign clmul_xor_stage4[i] = clmul_xor_stage3[2*i] ^ clmul_xor_stage3[2*i+1];
931: end
932:
933: assign clmul_result_raw = clmul_xor_stage4[0] ^ clmul_xor_stage4[1];
934:
935: for (genvar i=0; i<32; i++) begin : gen_rev_clmul_result
936: assign clmul_result_rev[i] = clmul_result_raw[31-i];
937: end
938:
939: // clmulr_result = rev(clmul(rev(a), rev(b)))
940: // clmulh_result = clmulr_result >> 1
941: always_comb begin
942: case(1'b1)
943: clmul_rmode: clmul_result = clmul_result_rev;
944: clmul_hmode: clmul_result = {1'b0, clmul_result_rev[31:1]};
945: default: clmul_result = clmul_result_raw;
946: endcase
947: end
948:
949: //////////////////////////////////////
950: // Multicycle Bitmanip Instructions //
951: //////////////////////////////////////
952: // Ternary instructions + Shift Rotations + CRC
953: // For ternary instructions (zbt), operand_a_i is tied to rs1 in the first cycle and rs3 in the
954: // second cycle. operand_b_i is always tied to rs2.
955:
956:
957: always_comb begin
958: unique case (operator_i)
959: ALU_CMOV: begin
960: imd_val_d_o = operand_a_i;
961: multicycle_result = (operand_b_i == 32'h0) ? operand_a_i : imd_val_q_i;
962: if (instr_first_cycle_i) begin
963: imd_val_we_o = 1'b1;
964: end else begin
965: imd_val_we_o = 1'b0;
966: end
967: end
968:
969: ALU_CMIX: begin
970: multicycle_result = imd_val_q_i | bwlogic_and_result;
971: imd_val_d_o = bwlogic_and_result;
972: if (instr_first_cycle_i) begin
973: imd_val_we_o = 1'b1;
974: end else begin
975: imd_val_we_o = 1'b0;
976: end
977: end
978:
979: ALU_FSR, ALU_FSL,
980: ALU_ROL, ALU_ROR: begin
981: if (shift_amt[4:0] == 5'h0) begin
982: multicycle_result = shift_amt[5] ? operand_a_i : imd_val_q_i;
983: end else begin
984: multicycle_result = imd_val_q_i | shift_result;
985: end
986: imd_val_d_o = shift_result;
987: if (instr_first_cycle_i) begin
988: imd_val_we_o = 1'b1;
989: end else begin
990: imd_val_we_o = 1'b0;
991: end
992: end
993:
994: ALU_CRC32_W, ALU_CRC32C_W,
995: ALU_CRC32_H, ALU_CRC32C_H,
996: ALU_CRC32_B, ALU_CRC32C_B: begin
997: imd_val_d_o = clmul_result_rev;
998: unique case(1'b1)
999: crc_bmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 8);
1000: crc_hmode: multicycle_result = clmul_result_rev ^ (operand_a_i >> 16);
1001: default: multicycle_result = clmul_result_rev;
1002: endcase
1003: if (instr_first_cycle_i) begin
1004: imd_val_we_o = 1'b1;
1005: end else begin
1006: imd_val_we_o = 1'b0;
1007: end
1008: end
1009:
1010: default: begin
1011: imd_val_d_o = operand_a_i;
1012: imd_val_we_o = 1'b0;
1013: multicycle_result = operand_a_i;
1014: end
1015: endcase
1016: end
1017:
1018: /////////////////////////////
1019: // Single-bit Instructions //
1020: /////////////////////////////
1021:
1022: always_comb begin
1023: unique case (operator_i)
1024: ALU_SBSET: singlebit_result = operand_a_i | shift_result;
1025: ALU_SBCLR: singlebit_result = operand_a_i & ~shift_result;
1026: ALU_SBINV: singlebit_result = operand_a_i ^ shift_result;
1027: default: singlebit_result = {31'h0, shift_result[0]}; // ALU_SBEXT
1028: endcase
1029: end
1030:
1031: ///////////////
1032: // Min / Max //
1033: ///////////////
1034:
1035: assign minmax_result = cmp_result ? operand_a_i : operand_b_i;
1036:
1037:
1038: //////////
1039: // Pack //
1040: //////////
1041:
1042: logic packu;
1043: logic packh;
1044: assign packu = operator_i == ALU_PACKU;
1045: assign packh = operator_i == ALU_PACKH;
1046:
1047: always_comb begin
1048: unique case (1'b1)
1049: packu: pack_result = {operand_b_i[31:16], operand_a_i[31:16]};
1050: packh: pack_result = {16'h0, operand_b_i[7:0], operand_a_i[7:0]};
1051: default: pack_result = {operand_b_i[15:0], operand_a_i[15:0]};
1052: endcase
1053: end
1054:
1055: //////////
1056: // Sext //
1057: //////////
1058:
1059: assign sext_result = (operator_i == ALU_SEXTB) ?
1060: { {24{operand_a_i[7]}}, operand_a_i[7:0]} : { {16{operand_a_i[15]}}, operand_a_i[15:0]};
1061:
1062: end else begin : g_no_alu_rvb
1063: // RV32B result signals
1064: assign minmax_result = '0;
1065: assign bitcnt_result = '0;
1066: assign pack_result = '0;
1067: assign sext_result = '0;
1068: assign multicycle_result = '0;
1069: assign singlebit_result = '0;
1070: assign shuffle_result = '0;
1071: assign butterfly_result = '0;
1072: assign invbutterfly_result = '0;
1073: assign clmul_result = '0;
1074: // RV32B support signals
1075: assign imd_val_d_o = '0;
1076: assign imd_val_we_o = '0;
1077: end
1078:
1079: ////////////////
1080: // Result mux //
1081: ////////////////
1082:
1083: always_comb begin
1084: result_o = '0;
1085:
1086: unique case (operator_i)
1087: // Bitwise Logic Operations (negate: RV32B)
1088: ALU_XOR, ALU_XNOR,
1089: ALU_OR, ALU_ORN,
1090: ALU_AND, ALU_ANDN: result_o = bwlogic_result;
1091:
1092: // Adder Operations
1093: ALU_ADD, ALU_SUB: result_o = adder_result;
1094:
1095: // Shift Operations
1096: ALU_SLL, ALU_SRL,
1097: ALU_SRA,
1098: // RV32B
1099: ALU_SLO, ALU_SRO: result_o = shift_result;
1100:
1101: // Shuffle Operations (RV32B)
1102: ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result;
1103:
1104: // Comparison Operations
1105: ALU_EQ, ALU_NE,
1106: ALU_GE, ALU_GEU,
1107: ALU_LT, ALU_LTU,
1108: ALU_SLT, ALU_SLTU: result_o = {31'h0,cmp_result};
1109:
1110: // MinMax Operations (RV32B)
1111: ALU_MIN, ALU_MAX,
1112: ALU_MINU, ALU_MAXU: result_o = minmax_result;
1113:
1114: // Bitcount Operations (RV32B)
1115: ALU_CLZ, ALU_CTZ,
1116: ALU_PCNT: result_o = {26'h0, bitcnt_result};
1117:
1118: // Pack Operations (RV32B)
1119: ALU_PACK, ALU_PACKH,
1120: ALU_PACKU: result_o = pack_result;
1121:
1122: // Sign-Extend (RV32B)
1123: ALU_SEXTB, ALU_SEXTH: result_o = sext_result;
1124:
1125: // Ternary Bitmanip Operations (RV32B)
1126: ALU_CMIX, ALU_CMOV,
1127: ALU_FSL, ALU_FSR,
1128: // Rotate Shift (RV32B)
1129: ALU_ROL, ALU_ROR,
1130: // Cyclic Redundancy Checks (RV32B)
1131: ALU_CRC32_W, ALU_CRC32C_W,
1132: ALU_CRC32_H, ALU_CRC32C_H,
1133: ALU_CRC32_B, ALU_CRC32C_B: result_o = multicycle_result;
1134:
1135: // Single-Bit Bitmanip Operations (RV32B)
1136: ALU_SBSET, ALU_SBCLR,
1137: ALU_SBINV, ALU_SBEXT: result_o = singlebit_result;
1138:
1139: // Bit Extract / Deposit (RV32B)
1140: ALU_BDEP: result_o = butterfly_result;
1141: ALU_BEXT: result_o = invbutterfly_result;
1142:
1143: // General Reverse / Or-combine (RV32B)
1144: ALU_GREV, ALU_GORC: result_o = butterfly_result;
1145:
1146: // Bit Field Place (RV32B)
1147: ALU_BFP: result_o = bfp_result;
1148:
1149: // Carry-less Multiply Operations (RV32B)
1150: ALU_CLMUL, ALU_CLMULR,
1151: ALU_CLMULH: result_o = clmul_result;
1152:
1153: default: ;
1154: endcase
1155: end
1156:
1157: endmodule
1158: