../src/lowrisc_ibex_ibex_icache_0.1/rtl/ibex_icache.sv Cov: 59.6%

   1: // Copyright lowRISC contributors.
   2: // Licensed under the Apache License, Version 2.0, see LICENSE for details.
   3: // SPDX-License-Identifier: Apache-2.0
   4: 
   5: /**
   6:  * Instruction cache
   7:  *
   8:  * Provides an instruction cache along with cache management, instruction buffering and prefetching
   9:  */
  10: 
  11: `include "prim_assert.sv"
  12: 
  13: module ibex_icache #(
  14:   // Cache arrangement parameters
  15:   parameter int unsigned BusWidth       = 32,
  16:   parameter int unsigned CacheSizeBytes = 4*1024,
  17:   parameter bit          ICacheECC      = 1'b0,
  18:   parameter int unsigned LineSize       = 64,
  19:   parameter int unsigned NumWays        = 2,
  20:   // Always make speculative bus requests in parallel with lookups
  21:   parameter bit          SpecRequest    = 1'b0,
  22:   // Only cache branch targets
  23:   parameter bit          BranchCache    = 1'b0
  24: ) (
  25:     // Clock and reset
  26:     input  logic                clk_i,
  27:     input  logic                rst_ni,
  28: 
  29:     // Signal that the core would like instructions
  30:     input  logic                req_i,
  31: 
  32:     // Set the cache's address counter
  33:     input  logic                branch_i,
  34:     input  logic                branch_spec_i,
  35:     input  logic [31:0]         addr_i,
  36: 
  37:     // IF stage interface: Pass fetched instructions to the core
  38:     input  logic                ready_i,
  39:     output logic                valid_o,
  40:     output logic [31:0]         rdata_o,
  41:     output logic [31:0]         addr_o,
  42:     output logic                err_o,
  43:     output logic                err_plus2_o,
  44: 
  45:     // Instruction memory / interconnect interface: Fetch instruction data from memory
  46:     output logic                instr_req_o,
  47:     input  logic                instr_gnt_i,
  48:     output logic [31:0]         instr_addr_o,
  49:     input  logic [BusWidth-1:0] instr_rdata_i,
  50:     input  logic                instr_err_i,
  51:     input  logic                instr_pmp_err_i,
  52:     input  logic                instr_rvalid_i,
  53: 
  54:     // Cache status
  55:     input  logic                icache_enable_i,
  56:     input  logic                icache_inval_i,
  57:     output logic                busy_o
  58: );
  59: 
  60:   // NOTE RTL IS DRAFT
  61: 
  62:   // Local constants
  63:   localparam int unsigned ADDR_W       = 32;
  64:   // Number of fill buffers (must be >= 2)
  65:   localparam int unsigned NUM_FB       = 4;
  66:   // Request throttling threshold
  67:   localparam int unsigned FB_THRESHOLD = NUM_FB - 2;
  68:   // Derived parameters
  69:   localparam int unsigned LINE_SIZE_ECC   = ICacheECC ? (LineSize + 8) : LineSize;
  70:   localparam int unsigned LINE_SIZE_BYTES = LineSize/8;
  71:   localparam int unsigned LINE_W          = $clog2(LINE_SIZE_BYTES);
  72:   localparam int unsigned BUS_BYTES       = BusWidth/8;
  73:   localparam int unsigned BUS_W           = $clog2(BUS_BYTES);
  74:   localparam int unsigned LINE_BEATS      = LINE_SIZE_BYTES / BUS_BYTES;
  75:   localparam int unsigned LINE_BEATS_W    = $clog2(LINE_BEATS);
  76:   localparam int unsigned NUM_LINES       = CacheSizeBytes / NumWays / LINE_SIZE_BYTES;
  77:   localparam int unsigned INDEX_W         = $clog2(NUM_LINES);
  78:   localparam int unsigned INDEX_HI        = INDEX_W + LINE_W - 1;
  79:   localparam int unsigned TAG_SIZE        = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit
  80:   localparam int unsigned TAG_SIZE_ECC    = ICacheECC ? (TAG_SIZE + 6) : TAG_SIZE;
  81:   localparam int unsigned OUTPUT_BEATS    = (BUS_BYTES / 2); // number of halfwords
  82: 
  83:   // Prefetch signals
  84:   logic [ADDR_W-1:0]                   lookup_addr_aligned;
  85:   logic [ADDR_W-1:0]                   prefetch_addr_d, prefetch_addr_q;
  86:   logic                                prefetch_addr_en;
  87:   // Cache pipelipe IC0 signals
  88:   logic                                branch_suppress;
  89:   logic                                lookup_throttle;
  90:   logic                                lookup_req_ic0;
  91:   logic [ADDR_W-1:0]                   lookup_addr_ic0;
  92:   logic [INDEX_W-1:0]                  lookup_index_ic0;
  93:   logic                                fill_req_ic0;
  94:   logic [INDEX_W-1:0]                  fill_index_ic0;
  95:   logic [TAG_SIZE-1:0]                 fill_tag_ic0;
  96:   logic [LineSize-1:0]                 fill_wdata_ic0;
  97:   logic                                lookup_grant_ic0;
  98:   logic                                lookup_actual_ic0;
  99:   logic                                fill_grant_ic0;
 100:   logic                                tag_req_ic0;
 101:   logic [INDEX_W-1:0]                  tag_index_ic0;
 102:   logic [NumWays-1:0]                  tag_banks_ic0;
 103:   logic                                tag_write_ic0;
 104:   logic [TAG_SIZE_ECC-1:0]             tag_wdata_ic0;
 105:   logic                                data_req_ic0;
 106:   logic [INDEX_W-1:0]                  data_index_ic0;
 107:   logic [NumWays-1:0]                  data_banks_ic0;
 108:   logic                                data_write_ic0;
 109:   logic [LINE_SIZE_ECC-1:0]            data_wdata_ic0;
 110:   // Cache pipelipe IC1 signals
 111:   logic [TAG_SIZE_ECC-1:0]             tag_rdata_ic1  [NumWays];
 112:   logic [LINE_SIZE_ECC-1:0]            data_rdata_ic1 [NumWays];
 113:   logic [LINE_SIZE_ECC-1:0]            hit_data_ic1;
 114:   logic                                lookup_valid_ic1;
 115:   logic [ADDR_W-1:INDEX_HI+1]          lookup_addr_ic1;
 116:   logic [NumWays-1:0]                  tag_match_ic1;
 117:   logic                                tag_hit_ic1;
 118:   logic [NumWays-1:0]                  tag_invalid_ic1;
 119:   logic [NumWays-1:0]                  lowest_invalid_way_ic1;
 120:   logic [NumWays-1:0]                  round_robin_way_ic1, round_robin_way_q;
 121:   logic [NumWays-1:0]                  sel_way_ic1;
 122:   logic                                ecc_err_ic1;
 123:   logic                                ecc_write_req;
 124:   logic [NumWays-1:0]                  ecc_write_ways;
 125:   logic [INDEX_W-1:0]                  ecc_write_index;
 126:   // Fill buffer signals
 127:   logic                                gnt_or_pmp_err, gnt_not_pmp_err;
 128:   logic [$clog2(NUM_FB)-1:0]           fb_fill_level;
 129:   logic                                fill_cache_new;
 130:   logic                                fill_new_alloc;
 131:   logic                                fill_spec_req, fill_spec_done, fill_spec_hold;
 132:   logic [NUM_FB-1:0][NUM_FB-1:0]       fill_older_d, fill_older_q;
 133:   logic [NUM_FB-1:0]                   fill_alloc_sel, fill_alloc;
 134:   logic [NUM_FB-1:0]                   fill_busy_d, fill_busy_q;
 135:   logic [NUM_FB-1:0]                   fill_done;
 136:   logic [NUM_FB-1:0]                   fill_in_ic1;
 137:   logic [NUM_FB-1:0]                   fill_stale_d, fill_stale_q;
 138:   logic [NUM_FB-1:0]                   fill_cache_d, fill_cache_q;
 139:   logic [NUM_FB-1:0]                   fill_hit_ic1, fill_hit_d, fill_hit_q;
 140:   logic [NUM_FB-1:0][LINE_BEATS_W:0]   fill_ext_cnt_d, fill_ext_cnt_q;
 141:   logic [NUM_FB-1:0]                   fill_ext_hold_d, fill_ext_hold_q;
 142:   logic [NUM_FB-1:0]                   fill_ext_done;
 143:   logic [NUM_FB-1:0][LINE_BEATS_W:0]   fill_rvd_cnt_d, fill_rvd_cnt_q;
 144:   logic [NUM_FB-1:0]                   fill_rvd_done;
 145:   logic [NUM_FB-1:0]                   fill_ram_done_d, fill_ram_done_q;
 146:   logic [NUM_FB-1:0]                   fill_out_grant;
 147:   logic [NUM_FB-1:0][LINE_BEATS_W:0]   fill_out_cnt_d, fill_out_cnt_q;
 148:   logic [NUM_FB-1:0]                   fill_out_done;
 149:   logic [NUM_FB-1:0]                   fill_ext_req, fill_rvd_exp, fill_ram_req, fill_out_req;
 150:   logic [NUM_FB-1:0]                   fill_data_sel, fill_data_reg, fill_data_hit, fill_data_rvd;
 151:   logic [NUM_FB-1:0][LINE_BEATS_W-1:0] fill_ext_off, fill_rvd_off;
 152:   logic [NUM_FB-1:0][LINE_BEATS_W:0]   fill_rvd_beat;
 153:   logic [NUM_FB-1:0]                   fill_ext_arb, fill_ram_arb, fill_out_arb;
 154:   logic [NUM_FB-1:0]                   fill_rvd_arb;
 155:   logic [NUM_FB-1:0]                   fill_entry_en;
 156:   logic [NUM_FB-1:0]                   fill_addr_en;
 157:   logic [NUM_FB-1:0]                   fill_way_en;
 158:   logic [NUM_FB-1:0][LINE_BEATS-1:0]   fill_data_en;
 159:   logic [NUM_FB-1:0][LINE_BEATS-1:0]   fill_err_d, fill_err_q;
 160:   logic [ADDR_W-1:0]                   fill_addr_q [NUM_FB];
 161:   logic [NumWays-1:0]                  fill_way_q  [NUM_FB];
 162:   logic [LineSize-1:0]                 fill_data_d [NUM_FB];
 163:   logic [LineSize-1:0]                 fill_data_q [NUM_FB];
 164:   logic [ADDR_W-1:BUS_W]               fill_ext_req_addr;
 165:   logic [ADDR_W-1:0]                   fill_ram_req_addr;
 166:   logic [NumWays-1:0]                  fill_ram_req_way;
 167:   logic [LineSize-1:0]                 fill_ram_req_data;
 168:   logic [LineSize-1:0]                 fill_out_data;
 169:   logic [LINE_BEATS-1:0]               fill_out_err;
 170:   // External req signals
 171:   logic                                instr_req;
 172:   logic [ADDR_W-1:BUS_W]               instr_addr;
 173:   // Data output signals
 174:   logic                                skid_complete_instr;
 175:   logic                                skid_ready;
 176:   logic                                output_compressed;
 177:   logic                                skid_valid_d, skid_valid_q, skid_en;
 178:   logic [15:0]                         skid_data_d, skid_data_q;
 179:   logic                                skid_err_q;
 180:   logic                                output_valid;
 181:   logic                                addr_incr_two;
 182:   logic                                output_addr_en;
 183:   logic [ADDR_W-1:1]                   output_addr_d, output_addr_q;
 184:   logic [15:0]                         output_data_lo, output_data_hi;
 185:   logic                                data_valid, output_ready;
 186:   logic [LineSize-1:0]                 line_data;
 187:   logic [LINE_BEATS-1:0]               line_err;
 188:   logic [31:0]                         line_data_muxed;
 189:   logic                                line_err_muxed;
 190:   logic [31:0]                         output_data;
 191:   logic                                output_err;
 192:   // Invalidations
 193:   logic                                start_inval, inval_done;
 194:   logic                                reset_inval_q;
 195:   logic                                inval_prog_d, inval_prog_q;
 196:   logic [INDEX_W-1:0]                  inval_index_d, inval_index_q;
 197: 
 198:   //////////////////////////
 199:   // Instruction prefetch //
 200:   //////////////////////////
 201: 
 202:   assign lookup_addr_aligned = {lookup_addr_ic0[ADDR_W-1:LINE_W],{LINE_W{1'b0}}};
 203: 
 204:   // The prefetch address increments by one cache line for each granted request.
 205:   // This address is also updated if there is a branch that is not granted, since the target
 206:   // address (addr_i) is only valid for one cycle while branch_i is high.
 207: 
 208:   // The captured branch target address is not forced to be aligned since the offset in the cache
 209:   // line must also be recorded for later use by the fill buffers.
 210:   assign prefetch_addr_d     =
 211:       lookup_grant_ic0 ? (lookup_addr_aligned + {{ADDR_W-LINE_W-1{1'b0}},1'b1,{LINE_W{1'b0}}}) :
 212:                          addr_i;
 213: 
 214:   assign prefetch_addr_en    = branch_i | lookup_grant_ic0;
 215: 
 216:   always_ff @(posedge clk_i) begin
 217:     if (prefetch_addr_en) begin
 218:       prefetch_addr_q <= prefetch_addr_d;
 219:     end
 220:   end
 221: 
 222:   ////////////////////////
 223:   // Pipeline stage IC0 //
 224:   ////////////////////////
 225: 
 226:   // Cache lookup
 227:   assign lookup_throttle  = (fb_fill_level > FB_THRESHOLD[$clog2(NUM_FB)-1:0]);
 228: 
 229:   assign lookup_req_ic0   = req_i & ~&fill_busy_q & (branch_i | ~lookup_throttle) & ~ecc_write_req;
 230:   assign lookup_addr_ic0  = branch_spec_i ? addr_i :
 231:                                             prefetch_addr_q;
 232:   assign lookup_index_ic0 = lookup_addr_ic0[INDEX_HI:LINE_W];
 233: 
 234:   // Cache write
 235:   assign fill_req_ic0   = (|fill_ram_req);
 236:   assign fill_index_ic0 = fill_ram_req_addr[INDEX_HI:LINE_W];
 237:   assign fill_tag_ic0   = {(~inval_prog_q & ~ecc_write_req),fill_ram_req_addr[ADDR_W-1:INDEX_HI+1]};
 238:   assign fill_wdata_ic0 = fill_ram_req_data;
 239: 
 240:   // Suppress a new lookup on a not-taken branch (as the address will be incorrect)
 241:   assign branch_suppress   = branch_spec_i & ~branch_i;
 242: 
 243:   // Arbitrated signals - lookups have highest priority
 244:   assign lookup_grant_ic0  = lookup_req_ic0 & ~branch_suppress;
 245:   assign fill_grant_ic0    = fill_req_ic0 & (~lookup_req_ic0 | branch_suppress) & ~inval_prog_q &
 246:                              ~ecc_write_req;
 247:   // Qualified lookup grant to mask ram signals in IC1 if access was not made
 248:   assign lookup_actual_ic0 = lookup_grant_ic0 & icache_enable_i & ~inval_prog_q & ~start_inval;
 249: 
 250:   // Tagram
 251:   assign tag_req_ic0   = lookup_req_ic0 | fill_req_ic0 | inval_prog_q | ecc_write_req;
 252:   assign tag_index_ic0 = inval_prog_q   ? inval_index_q :
 253:                          ecc_write_req  ? ecc_write_index :
 254:                          fill_grant_ic0 ? fill_index_ic0 :
 255:                                           lookup_index_ic0;
 256:   assign tag_banks_ic0 = ecc_write_req  ? ecc_write_ways :
 257:                          fill_grant_ic0 ? fill_ram_req_way :
 258:                                           {NumWays{1'b1}};
 259:   assign tag_write_ic0 = fill_grant_ic0 | inval_prog_q | ecc_write_req;
 260: 
 261:   // Dataram
 262:   assign data_req_ic0   = lookup_req_ic0 | fill_req_ic0;
 263:   assign data_index_ic0 = tag_index_ic0;
 264:   assign data_banks_ic0 = tag_banks_ic0;
 265:   assign data_write_ic0 = tag_write_ic0;
 266: 
 267:   // Append ECC checkbits to write data if required
 268:   if (ICacheECC) begin : gen_ecc_wdata
 269: 
 270:     // Tagram ECC
 271:     // Reuse the same ecc encoding module for larger cache sizes by padding with zeros
 272:     logic [21:0]          tag_ecc_input_padded;
 273:     logic [27:0]          tag_ecc_output_padded;
 274:     logic [22-TAG_SIZE:0] tag_ecc_output_unused;
 275: 
 276:     assign tag_ecc_input_padded  = {{22-TAG_SIZE{1'b0}},fill_tag_ic0};
 277:     assign tag_ecc_output_unused = tag_ecc_output_padded[21:TAG_SIZE-1];
 278: 
 279:     prim_secded_28_22_enc tag_ecc_enc (
 280:       .in  (tag_ecc_input_padded),
 281:       .out (tag_ecc_output_padded)
 282:     );
 283: 
 284:     assign tag_wdata_ic0 = {tag_ecc_output_padded[27:22],tag_ecc_output_padded[TAG_SIZE-1:0]};
 285: 
 286:     // Dataram ECC
 287:     prim_secded_72_64_enc data_ecc_enc (
 288:       .in  (fill_wdata_ic0),
 289:       .out (data_wdata_ic0)
 290:     );
 291: 
 292:   end else begin : gen_noecc_wdata
 293:     assign tag_wdata_ic0  = fill_tag_ic0;
 294:     assign data_wdata_ic0 = fill_wdata_ic0;
 295:   end
 296: 
 297:   ////////////////
 298:   // IC0 -> IC1 //
 299:   ////////////////
 300: 
 301:   for (genvar way = 0; way < NumWays; way++) begin : gen_rams
 302:     // Tag RAM instantiation
 303:     prim_ram_1p #(
 304:       .Width           (TAG_SIZE_ECC),
 305:       .Depth           (NUM_LINES),
 306:       .DataBitsPerMask (TAG_SIZE_ECC)
 307:     ) tag_bank (
 308:       .clk_i    (clk_i),
 309:       .req_i    (tag_req_ic0 & tag_banks_ic0[way]),
 310:       .write_i  (tag_write_ic0),
 311:       .wmask_i  ({TAG_SIZE_ECC{1'b1}}),
 312:       .addr_i   (tag_index_ic0),
 313:       .wdata_i  (tag_wdata_ic0),
 314:       .rdata_o  (tag_rdata_ic1[way])
 315:     );
 316:     // Data RAM instantiation
 317:     prim_ram_1p #(
 318:       .Width           (LINE_SIZE_ECC),
 319:       .Depth           (NUM_LINES),
 320:       .DataBitsPerMask (LINE_SIZE_ECC)
 321:     ) data_bank (
 322:       .clk_i    (clk_i),
 323:       .req_i    (data_req_ic0 & data_banks_ic0[way]),
 324:       .write_i  (data_write_ic0),
 325:       .wmask_i  ({LINE_SIZE_ECC{1'b1}}),
 326:       .addr_i   (data_index_ic0),
 327:       .wdata_i  (data_wdata_ic0),
 328:       .rdata_o  (data_rdata_ic1[way])
 329:     );
 330:   end
 331: 
 332:   always_ff @(posedge clk_i or negedge rst_ni) begin
 333:     if (!rst_ni) begin
 334:       lookup_valid_ic1 <= 1'b0;
 335:     end else begin
 336:       lookup_valid_ic1 <= lookup_actual_ic0;
 337:     end
 338:   end
 339: 
 340:   always_ff @(posedge clk_i) begin
 341:     if (lookup_grant_ic0) begin
 342:       lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:INDEX_HI+1];
 343:       fill_in_ic1     <= fill_alloc_sel;
 344:     end
 345:   end
 346: 
 347:   ////////////////////////
 348:   // Pipeline stage IC1 //
 349:   ////////////////////////
 350: 
 351:   // Tag matching
 352:   for (genvar way = 0; way < NumWays; way++) begin : gen_tag_match
 353:     assign tag_match_ic1[way]   = (tag_rdata_ic1[way][TAG_SIZE-1:0] ==
 354:                                    {1'b1,lookup_addr_ic1[ADDR_W-1:INDEX_HI+1]});
 355:     assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][TAG_SIZE-1];
 356:   end
 357: 
 358:   assign tag_hit_ic1 = |tag_match_ic1;
 359: 
 360:   // Hit data mux
 361:   always_comb begin
 362:     hit_data_ic1 = 'b0;
 363:     for (int way = 0; way < NumWays; way++) begin
 364:       if (tag_match_ic1[way]) begin
 365:         hit_data_ic1 |= data_rdata_ic1[way];
 366:       end
 367:     end
 368:   end
 369: 
 370:   // Way selection for allocations to the cache (onehot signals)
 371:   // 1 first invalid way
 372:   // 2 global round-robin (pseudorandom) way
 373:   assign lowest_invalid_way_ic1[0] = tag_invalid_ic1[0];
 374:   assign round_robin_way_ic1[0]    = round_robin_way_q[NumWays-1];
 375:   for (genvar way = 1; way < NumWays; way++) begin : gen_lowest_way
 376:     assign lowest_invalid_way_ic1[way] = tag_invalid_ic1[way] & ~|tag_invalid_ic1[way-1:0];
 377:     assign round_robin_way_ic1[way]    = round_robin_way_q[way-1];
 378:   end
 379: 
 380:   always_ff @(posedge clk_i or negedge rst_ni) begin
 381:     if (!rst_ni) begin
 382:       round_robin_way_q <= {{NumWays-1{1'b0}},1'b1};
 383:     end else if (lookup_valid_ic1) begin
 384:       round_robin_way_q <= round_robin_way_ic1;
 385:     end
 386:   end
 387: 
 388:   assign sel_way_ic1 = |tag_invalid_ic1 ? lowest_invalid_way_ic1 :
 389:                                           round_robin_way_q;
 390: 
 391:   // ECC checking logic
 392:   if (ICacheECC) begin : gen_data_ecc_checking
 393:     logic [NumWays-1:0] tag_err_ic1;
 394:     logic [1:0]         data_err_ic1;
 395:     logic               ecc_correction_write_d, ecc_correction_write_q;
 396:     logic [NumWays-1:0] ecc_correction_ways_d, ecc_correction_ways_q;
 397:     logic [INDEX_W-1:0] lookup_index_ic1, ecc_correction_index_q;
 398: 
 399:     // Tag ECC checking
 400:     for (genvar way = 0; way < NumWays; way++) begin : gen_tag_ecc
 401:       logic [1:0]  tag_err_bank_ic1;
 402:       logic [27:0] tag_rdata_padded_ic1;
 403: 
 404:       // Expand the tag rdata with extra padding if the tag size is less than the maximum
 405:       assign tag_rdata_padded_ic1 = {tag_rdata_ic1[way][TAG_SIZE_ECC-1-:6],
 406:                                      {22-TAG_SIZE{1'b0}},
 407:                                      tag_rdata_ic1[way][TAG_SIZE-1:0]};
 408: 
 409:       prim_secded_28_22_dec data_ecc_dec (
 410:         .in         (tag_rdata_padded_ic1),
 411:         .d_o        (),
 412:         .syndrome_o (),
 413:         .err_o      (tag_err_bank_ic1)
 414:       );
 415:       assign tag_err_ic1[way] = |tag_err_bank_ic1;
 416:     end
 417: 
 418:     // Data ECC checking
 419:     // Note - could generate for all ways and mux after
 420:     prim_secded_72_64_dec data_ecc_dec (
 421:       .in         (hit_data_ic1),
 422:       .d_o        (),
 423:       .syndrome_o (),
 424:       .err_o      (data_err_ic1)
 425:     );
 426: 
 427:     assign ecc_err_ic1 = lookup_valid_ic1 & ((|data_err_ic1) | (|tag_err_ic1));
 428: 
 429:     // Error correction
 430:     // The way(s) producing the error will be invalidated in the next cycle.
 431:     assign ecc_correction_ways_d  = tag_err_ic1 | (tag_match_ic1 & {NumWays{|data_err_ic1}});
 432:     assign ecc_correction_write_d = ecc_err_ic1;
 433: 
 434:     always_ff @(posedge clk_i or negedge rst_ni) begin
 435:       if (!rst_ni) begin
 436:         ecc_correction_write_q <= 1'b0;
 437:       end else begin
 438:         ecc_correction_write_q <= ecc_correction_write_d;
 439:       end
 440:     end
 441: 
 442:     // The index is required in IC1 only when ECC is configured so is registered here
 443:     always_ff @(posedge clk_i) begin
 444:       if (lookup_grant_ic0) begin
 445:         lookup_index_ic1 <= lookup_addr_ic0[INDEX_HI-:INDEX_W];
 446:       end
 447:     end
 448: 
 449:     // Store the ways with errors to be invalidated
 450:     always_ff @(posedge clk_i) begin
 451:       if (ecc_err_ic1) begin
 452:         ecc_correction_ways_q  <= ecc_correction_ways_d;
 453:         ecc_correction_index_q <= lookup_index_ic1;
 454:       end
 455:     end
 456: 
 457:     assign ecc_write_req   = ecc_correction_write_q;
 458:     assign ecc_write_ways  = ecc_correction_ways_q;
 459:     assign ecc_write_index = ecc_correction_index_q;
 460: 
 461:   end else begin : gen_no_data_ecc
 462:     assign ecc_err_ic1     = 1'b0;
 463:     assign ecc_write_req   = 1'b0;
 464:     assign ecc_write_ways  = '0;
 465:     assign ecc_write_index = '0;
 466:   end
 467: 
 468:   ///////////////////////////////
 469:   // Cache allocation decision //
 470:   ///////////////////////////////
 471: 
 472:   if (BranchCache) begin : gen_caching_logic
 473: 
 474:     // Cache branch target + a number of subsequent lines
 475:     localparam int unsigned CACHE_AHEAD = 2;
 476:     localparam int unsigned CACHE_CNT_W = (CACHE_AHEAD == 1) ? 1 : $clog2(CACHE_AHEAD) + 1;
 477:     logic                   cache_cnt_dec;
 478:     logic [CACHE_CNT_W-1:0] cache_cnt_d, cache_cnt_q;
 479: 
 480:     assign cache_cnt_dec = lookup_grant_ic0 & (|cache_cnt_q);
 481:     assign cache_cnt_d   = branch_i ? CACHE_AHEAD[CACHE_CNT_W-1:0] :
 482:                                       (cache_cnt_q - {{CACHE_CNT_W-1{1'b0}},cache_cnt_dec});
 483: 
 484:     always_ff @(posedge clk_i or negedge rst_ni) begin
 485:       if (!rst_ni) begin
 486:         cache_cnt_q <= '0;
 487:       end else begin
 488:         cache_cnt_q <= cache_cnt_d;
 489:       end
 490:     end
 491: 
 492:     assign fill_cache_new = (branch_i | (|cache_cnt_q)) & icache_enable_i &
 493:                             ~icache_inval_i & ~inval_prog_q;
 494: 
 495:   end else begin : gen_cache_all
 496: 
 497:     // Cache all missing fetches
 498:     assign fill_cache_new = icache_enable_i & ~start_inval & ~inval_prog_q;
 499:   end
 500: 
 501:   //////////////////////////
 502:   // Fill buffer tracking //
 503:   //////////////////////////
 504: 
 505:   always_comb begin
 506:     fb_fill_level = '0;
 507:     for (int i = 0; i < NUM_FB; i++) begin
 508:       if (fill_busy_q[i] & ~fill_stale_q[i]) begin
 509:         fb_fill_level += {{$clog2(NUM_FB)-1{1'b0}},1'b1};
 510:       end
 511:     end
 512:   end
 513: 
 514:   // PMP errors might not / don't need to be granted (since the external request is masked)
 515:   assign gnt_or_pmp_err  = instr_gnt_i | instr_pmp_err_i;
 516:   assign gnt_not_pmp_err = instr_gnt_i & ~instr_pmp_err_i;
 517:   // Allocate a new buffer for every granted lookup
 518:   assign fill_new_alloc = lookup_grant_ic0;
 519:   // Track whether a speculative external request was made from IC0, and whether it was granted
 520:   assign fill_spec_req  = (SpecRequest | branch_i) & ~|fill_ext_req;
 521:   assign fill_spec_done = fill_spec_req & gnt_not_pmp_err;
 522:   assign fill_spec_hold = fill_spec_req & ~gnt_or_pmp_err;
 523: 
 524:   for (genvar fb = 0; fb < NUM_FB; fb++) begin : gen_fbs
 525: 
 526:     /////////////////////////////
 527:     // Fill buffer allocations //
 528:     /////////////////////////////
 529: 
 530:     // Allocate the lowest available buffer
 531:     if (fb == 0) begin : gen_fb_zero
 532:       assign fill_alloc_sel[fb] = ~fill_busy_q[fb];
 533:     end else begin : gen_fb_rest
 534:       assign fill_alloc_sel[fb] = ~fill_busy_q[fb] & (&fill_busy_q[fb-1:0]);
 535:     end
 536: 
 537:     assign fill_alloc[fb]      = fill_alloc_sel[fb] & fill_new_alloc;
 538:     assign fill_busy_d[fb]     = fill_alloc[fb] | (fill_busy_q[fb] & ~fill_done[fb]);
 539: 
 540:     // Track which other fill buffers are older than this one (for age-based arbitration)
 541:     // TODO sparsify
 542:     assign fill_older_d[fb]    = (fill_alloc[fb] ? fill_busy_q : fill_older_q[fb]) & ~fill_done;
 543: 
 544:     // A fill buffer can release once all its actions are completed
 545:                                  // all data written to the cache (unless hit or error)
 546:     assign fill_done[fb]       = (fill_ram_done_q[fb] | fill_hit_q[fb] | ~fill_cache_q[fb] |
 547:                                   (|fill_err_q[fb])) &
 548:                                  // all data output unless stale due to intervening branch
 549:                                  (fill_out_done[fb] | fill_stale_q[fb] | branch_i) &
 550:                                  // all external requests completed
 551:                                  fill_rvd_done[fb];
 552: 
 553:     /////////////////////////////////
 554:     // Fill buffer status tracking //
 555:     /////////////////////////////////
 556: 
 557:     // Track staleness (requests become stale when a branch intervenes)
 558:     assign fill_stale_d[fb]    = fill_busy_q[fb] & (branch_i | fill_stale_q[fb]);
 559:     // Track whether or not this request should allocate to the cache
 560:     // Any invalidation or disabling of the cache while the buffer is busy will stop allocation
 561:     assign fill_cache_d[fb]    = (fill_alloc[fb] & fill_cache_new) |
 562:                                  (fill_cache_q[fb] & fill_busy_q[fb] &
 563:                                   icache_enable_i & ~icache_inval_i);
 564:     // Record whether the request hit in the cache
 565:     assign fill_hit_ic1[fb]    = lookup_valid_ic1 & fill_in_ic1[fb] & tag_hit_ic1;
 566:     assign fill_hit_d[fb]      = (fill_hit_ic1[fb] & ~ecc_err_ic1) |
 567:                                  (fill_hit_q[fb] & fill_busy_q[fb]);
 568: 
 569:     ///////////////////////////////////////////
 570:     // Fill buffer external request tracking //
 571:     ///////////////////////////////////////////
 572: 
 573:     // Make an external request
 574:     assign fill_ext_req[fb]    = fill_busy_q[fb] & ~fill_ext_done[fb];
 575: 
 576:     // Count the number of completed external requests (each line requires LINE_BEATS requests)
 577:     // Don't count fake PMP error grants here since they will never receive an rvalid response
 578:     assign fill_ext_cnt_d[fb]  = fill_alloc[fb] ?
 579:                                    {{LINE_BEATS_W{1'b0}},fill_spec_done} :
 580:                                    (fill_ext_cnt_q[fb] + {{LINE_BEATS_W{1'b0}},
 581:                                                           fill_ext_arb[fb] & gnt_not_pmp_err});
 582:     // External request must be held until granted
 583:     assign fill_ext_hold_d[fb] = (fill_alloc[fb] & fill_spec_hold) |
 584:                                  (fill_ext_arb[fb] & ~gnt_or_pmp_err);
 585:     // External requests are completed when the counter is filled or when the request is cancelled
 586:     assign fill_ext_done[fb]   = (fill_ext_cnt_q[fb][LINE_BEATS_W] |
 587:                                   // external requests are considered complete if the request hit
 588:                                   (fill_hit_ic1[fb] & ~ecc_err_ic1) | fill_hit_q[fb] |
 589:                                   // external requests will stop once any PMP error is received
 590:                                   fill_err_q[fb][fill_ext_off[fb]] |
 591:                                   // cancel if the line is stale and won't be cached
 592:                                   (~fill_cache_q[fb] & (branch_i | fill_stale_q[fb]))) &
 593:                                  // can't cancel while we are waiting for a grant on the bus
 594:                                  ~fill_ext_hold_q[fb];
 595:     // Track whether this fill buffer expects to receive beats of data
 596:     assign fill_rvd_exp[fb]    = fill_busy_q[fb] & ~fill_rvd_done[fb];
 597:     // Count the number of rvalid beats received
 598:     assign fill_rvd_cnt_d[fb]  = fill_alloc[fb] ? '0 :
 599:                                                   (fill_rvd_cnt_q[fb] +
 600:                                                    {{LINE_BEATS_W{1'b0}},fill_rvd_arb[fb]});
 601:     // External data is complete when all issued external requests have received their data
 602:     assign fill_rvd_done[fb]   = fill_ext_done[fb] & (fill_rvd_cnt_q[fb] == fill_ext_cnt_q[fb]);
 603: 
 604:     //////////////////////////////////////
 605:     // Fill buffer data output tracking //
 606:     //////////////////////////////////////
 607: 
 608:     // Send data to the IF stage for requests that are not stale, have not completed their
 609:     // data output, and have data available to send.
 610:     // Data is available if:
 611:     // - The request hit in the cache
 612:     // - The current beat is an error (since a PMP error might not actually receive any data)
 613:     // - Buffered data is available (fill_rvd_cnt_q is ahead of fill_out_cnt_q)
 614:     // - Data is available from the bus this cycle (fill_rvd_arb)
 615:     assign fill_out_req[fb]    = fill_busy_q[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
 616:                                  (fill_hit_ic1[fb] | fill_hit_q[fb] |
 617:                                   (fill_err_q[fb][fill_out_cnt_q[fb][LINE_BEATS_W-1:0]]) |
 618:                                   (fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_rvd_arb[fb]);
 619: 
 620:     // Calculate when a beat of data is output. Any ECC error squashes the output that cycle.
 621:     assign fill_out_grant[fb]  = fill_out_arb[fb] & output_ready & ~ecc_err_ic1;
 622: 
 623:     // Count the beats of data output to the IF stage
 624:     assign fill_out_cnt_d[fb]  = fill_alloc[fb] ? {1'b0,lookup_addr_ic0[LINE_W-1:BUS_W]} :
 625:                                                   (fill_out_cnt_q[fb] +
 626:                                                    {{LINE_BEATS_W{1'b0}},fill_out_grant[fb]});
 627:     // Data output complete when the counter fills
 628:     assign fill_out_done[fb]   = fill_out_cnt_q[fb][LINE_BEATS_W];
 629: 
 630:     //////////////////////////////////////
 631:     // Fill buffer ram request tracking //
 632:     //////////////////////////////////////
 633: 
 634:                                  // make a fill request once all data beats received
 635:     assign fill_ram_req[fb]    = fill_busy_q[fb] & fill_rvd_cnt_q[fb][LINE_BEATS_W] &
 636:                                  // unless the request hit, was non-allocating or got an error
 637:                                  ~fill_hit_q[fb] & fill_cache_q[fb] & ~|fill_err_q[fb] &
 638:                                  // or the request was already completed
 639:                                  ~fill_ram_done_q[fb];
 640: 
 641:     // Record when a cache allocation request has been completed
 642:     assign fill_ram_done_d[fb] = fill_ram_arb[fb] | (fill_ram_done_q[fb] & fill_busy_q[fb]);
 643: 
 644:     //////////////////////////////
 645:     // Fill buffer line offsets //
 646:     //////////////////////////////
 647: 
 648:     // When we branch into the middle of a line, the output count will not start from zero. This
 649:     // beat count is used to know which incoming rdata beats are relevant.
 650:     assign fill_rvd_beat[fb]   = {1'b0,fill_addr_q[fb][LINE_W-1:BUS_W]} +
 651:                                  fill_rvd_cnt_q[fb][LINE_BEATS_W:0];
 652:     assign fill_ext_off[fb]    = fill_addr_q[fb][LINE_W-1:BUS_W] +
 653:                                  fill_ext_cnt_q[fb][LINE_BEATS_W-1:0];
 654:     assign fill_rvd_off[fb]    = fill_rvd_beat[fb][LINE_BEATS_W-1:0];
 655: 
 656:     /////////////////////////////
 657:     // Fill buffer arbitration //
 658:     /////////////////////////////
 659: 
 660:     // Age based arbitration - all these signals are one-hot
 661:     assign fill_ext_arb[fb]    = fill_ext_req[fb] & ~|(fill_ext_req & fill_older_q[fb]);
 662:     assign fill_ram_arb[fb]    = fill_ram_req[fb] & fill_grant_ic0 & ~|(fill_ram_req & fill_older_q[fb]);
 663:     // Calculate which fill buffer is the oldest one which still needs to output data to IF
 664:     assign fill_data_sel[fb]   = ~|(fill_busy_q & ~fill_out_done & ~fill_stale_q &
 665:                                     fill_older_q[fb]);
 666:     // Arbitrate the request which has data available to send, and is the oldest outstanding
 667:     assign fill_out_arb[fb]    = fill_out_req[fb] & fill_data_sel[fb];
 668:     // Assign incoming rvalid data to the oldest fill buffer expecting it
 669:     assign fill_rvd_arb[fb]    = instr_rvalid_i & fill_rvd_exp[fb] & ~|(fill_rvd_exp & fill_older_q[fb]);
 670: 
 671:     /////////////////////////////
 672:     // Fill buffer data muxing //
 673:     /////////////////////////////
 674: 
 675:     // Output data muxing controls
 676:     // 1. Select data from the fill buffer data register
 677:     assign fill_data_reg[fb]   = fill_busy_q[fb] & ~fill_stale_q[fb] &
 678:                                  ~fill_out_done[fb] & fill_data_sel[fb] &
 679:     //                           The incoming data is already ahead of the output count
 680:                                  ((fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_hit_q[fb] |
 681:                                   (|fill_err_q[fb]));
 682:     // 2. Select IC1 hit data
 683:     assign fill_data_hit[fb]   = fill_busy_q[fb] & fill_hit_ic1[fb] & fill_data_sel[fb];
 684:     // 3. Select incoming instr_rdata_i
 685:     assign fill_data_rvd[fb]   = fill_busy_q[fb] & fill_rvd_arb[fb] & ~fill_hit_q[fb] &
 686:                                  ~fill_hit_ic1[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
 687:     //                           The incoming data lines up with the output count
 688:                                  (fill_rvd_beat[fb] == fill_out_cnt_q[fb]) & fill_data_sel[fb];
 689: 
 690: 
 691:     ///////////////////////////
 692:     // Fill buffer registers //
 693:     ///////////////////////////
 694: 
 695:     // Fill buffer general enable
 696:     assign fill_entry_en[fb]   = fill_alloc[fb] | fill_busy_q[fb];
 697: 
 698:     always_ff @(posedge clk_i or negedge rst_ni) begin
 699:       if (!rst_ni) begin
 700:         fill_busy_q[fb]     <= 1'b0;
 701:         fill_older_q[fb]    <= '0;
 702:         fill_stale_q[fb]    <= 1'b0;
 703:         fill_cache_q[fb]    <= 1'b0;
 704:         fill_hit_q[fb]      <= 1'b0;
 705:         fill_ext_cnt_q[fb]  <= '0;
 706:         fill_ext_hold_q[fb] <= 1'b0;
 707:         fill_rvd_cnt_q[fb]  <= '0;
 708:         fill_ram_done_q[fb] <= 1'b0;
 709:         fill_out_cnt_q[fb]  <= '0;
 710:       end else if (fill_entry_en[fb]) begin
 711:         fill_busy_q[fb]     <= fill_busy_d[fb];
 712:         fill_older_q[fb]    <= fill_older_d[fb];
 713:         fill_stale_q[fb]    <= fill_stale_d[fb];
 714:         fill_cache_q[fb]    <= fill_cache_d[fb];
 715:         fill_hit_q[fb]      <= fill_hit_d[fb];
 716:         fill_ext_cnt_q[fb]  <= fill_ext_cnt_d[fb];
 717:         fill_ext_hold_q[fb] <= fill_ext_hold_d[fb];
 718:         fill_rvd_cnt_q[fb]  <= fill_rvd_cnt_d[fb];
 719:         fill_ram_done_q[fb] <= fill_ram_done_d[fb];
 720:         fill_out_cnt_q[fb]  <= fill_out_cnt_d[fb];
 721:       end
 722:     end
 723: 
 724:     ////////////////////////////////////////
 725:     // Fill buffer address / data storage //
 726:     ////////////////////////////////////////
 727: 
 728:     assign fill_addr_en[fb]    = fill_alloc[fb];
 729:     assign fill_way_en[fb]     = (lookup_valid_ic1 & fill_in_ic1[fb]);
 730: 
 731:     always_ff @(posedge clk_i) begin
 732:       if (fill_addr_en[fb]) begin
 733:         fill_addr_q[fb] <= lookup_addr_ic0;
 734:       end
 735:     end
 736: 
 737:     always_ff @(posedge clk_i) begin
 738:       if (fill_way_en[fb]) begin
 739:         fill_way_q[fb]  <= sel_way_ic1;
 740:       end
 741:     end
 742: 
 743:     // Data either comes from the cache or the bus. If there was an ECC error, we must take
 744:     // the incoming bus data since the cache hit data is corrupted.
 745:     assign fill_data_d[fb] = (fill_hit_ic1[fb] & ~ecc_err_ic1) ? hit_data_ic1[LineSize-1:0] :
 746:                                                                  {LINE_BEATS{instr_rdata_i}};
 747: 
 748:     for (genvar b = 0; b < LINE_BEATS; b++) begin : gen_data_buf
 749:       // Error tracking (per beat)
 750:       //                           Either a PMP error on a speculative request,
 751:       assign fill_err_d[fb][b]   = (instr_pmp_err_i & fill_alloc[fb] & fill_spec_req &
 752:                                     (lookup_addr_ic0[LINE_W-1:BUS_W] == b[LINE_BEATS_W-1:0])) |
 753:       //                           a PMP error on a fill buffer ext req
 754:                                    (instr_pmp_err_i & fill_ext_arb[fb] &
 755:                                     (fill_ext_off[fb] == b[LINE_BEATS_W-1:0])) |
 756:       //                           Or a data error with instr_rvalid_i
 757:                                    (fill_rvd_arb[fb] & instr_err_i &
 758:                                     (fill_rvd_off[fb] == b[LINE_BEATS_W-1:0])) |
 759:       //                           Hold the error once recorded
 760:                                    (fill_busy_q[fb] & fill_err_q[fb][b]);
 761: 
 762:       always_ff @(posedge clk_i or negedge rst_ni) begin
 763:         if (!rst_ni) begin
 764:           fill_err_q[fb][b] <= '0;
 765:         end else if (fill_entry_en[fb]) begin
 766:           fill_err_q[fb][b] <= fill_err_d[fb][b];
 767:         end
 768:       end
 769: 
 770:       // Enable the relevant part of the data register (or all for cache hits)
 771:       // Ignore incoming rvalid data when we already have cache hit data
 772:       assign fill_data_en[fb][b] = fill_hit_ic1[fb] |
 773:                                    (fill_rvd_arb[fb] & ~fill_hit_q[fb] &
 774:                                     (fill_rvd_off[fb] == b[LINE_BEATS_W-1:0]));
 775: 
 776:       always_ff @(posedge clk_i) begin
 777:         if (fill_data_en[fb][b]) begin
 778:           fill_data_q[fb][b*BusWidth+:BusWidth] <= fill_data_d[fb][b*BusWidth+:BusWidth];
 779:         end
 780:       end
 781: 
 782:     end
 783:   end
 784: 
 785:   ////////////////////////////////
 786:   // Fill buffer one-hot muxing //
 787:   ////////////////////////////////
 788: 
 789:   // External req info
 790:   always_comb begin
 791:     fill_ext_req_addr = '0;
 792:     for (int i = 0; i < NUM_FB; i++) begin
 793:       if (fill_ext_arb[i]) begin
 794:         fill_ext_req_addr |= {fill_addr_q[i][ADDR_W-1:LINE_W], fill_ext_off[i]};
 795:       end
 796:     end
 797:   end
 798: 
 799:   // Cache req info
 800:   always_comb begin
 801:     fill_ram_req_addr = '0;
 802:     fill_ram_req_way  = '0;
 803:     fill_ram_req_data = '0;
 804:     for (int i = 0; i < NUM_FB; i++) begin
 805:       if (fill_ram_arb[i]) begin
 806:         fill_ram_req_addr |= fill_addr_q[i];
 807:         fill_ram_req_way  |= fill_way_q[i];
 808:         fill_ram_req_data |= fill_data_q[i];
 809:       end
 810:     end
 811:   end
 812: 
 813:   // IF stage output data
 814:   always_comb begin
 815:     fill_out_data = '0;
 816:     fill_out_err  = '0;
 817:     for (int i = 0; i < NUM_FB; i++) begin
 818:       if (fill_data_reg[i]) begin
 819:         fill_out_data |= fill_data_q[i];
 820:         // Ignore any speculative errors accumulated on cache hits
 821:         fill_out_err  |= (fill_err_q[i] & ~{LINE_BEATS{fill_hit_q[i]}});
 822:       end
 823:     end
 824:   end
 825: 
 826:   ///////////////////////
 827:   // External requests //
 828:   ///////////////////////
 829: 
 830:   assign instr_req  = ((SpecRequest | branch_i) & lookup_grant_ic0) |
 831:                       |fill_ext_req;
 832: 
 833:   assign instr_addr = |fill_ext_req ? fill_ext_req_addr :
 834:                                       lookup_addr_ic0[ADDR_W-1:BUS_W];
 835: 
 836:   assign instr_req_o  = instr_req;
 837:   assign instr_addr_o = {instr_addr[ADDR_W-1:BUS_W],{BUS_W{1'b0}}};
 838: 
 839:   ////////////////////////
 840:   // Output data muxing //
 841:   ////////////////////////
 842: 
 843:   // Mux between line-width data sources
 844:   assign line_data = |fill_data_hit ? hit_data_ic1[LineSize-1:0] : fill_out_data;
 845:   assign line_err  = |fill_data_hit ? {LINE_BEATS{1'b0}} : fill_out_err;
 846: 
 847:   // Mux the relevant beat of line data, based on the output address
 848:   always_comb begin
 849:     line_data_muxed = '0;
 850:     line_err_muxed  = 1'b0;
 851:     for (int i = 0; i < LINE_BEATS; i++) begin
 852:       // When data has been skidded, the output address is behind by one
 853:       if ((output_addr_q[LINE_W-1:BUS_W] + {{LINE_BEATS_W-1{1'b0}},skid_valid_q}) ==
 854:           i[LINE_BEATS_W-1:0]) begin
 855:         line_data_muxed |= line_data[i*32+:32];
 856:         line_err_muxed  |= line_err[i];
 857:       end
 858:     end
 859:   end
 860: 
 861:   // Mux between incoming rdata and the muxed line data
 862:   assign output_data = |fill_data_rvd ? instr_rdata_i : line_data_muxed;
 863:   assign output_err  = |fill_data_rvd ? instr_err_i   : line_err_muxed;
 864: 
 865:   // Output data is valid (from any of the three possible sources). Note that fill_out_arb
 866:   // must be used here rather than fill_out_req because data can become valid out of order
 867:   // (e.g. cache hit data can become available ahead of an older outstanding miss).
 868:   // Any ECC error suppresses the output that cycle.
 869:   assign data_valid = |fill_out_arb & ~ecc_err_ic1;
 870: 
 871:   // Skid buffer data
 872:   assign skid_data_d = output_data[31:16];
 873: 
 874:   assign skid_en     = data_valid & (ready_i | skid_ready);
 875: 
 876:   always_ff @(posedge clk_i) begin
 877:     if (skid_en) begin
 878:       skid_data_q <= skid_data_d;
 879:       skid_err_q  <= output_err;
 880:     end
 881:   end
 882: 
 883:   // The data in the skid buffer is ready if it's a complete compressed instruction or if there's
 884:   // an error (no need to wait for the second half)
 885:   assign skid_complete_instr = skid_valid_q & ((skid_data_q[1:0] != 2'b11) | skid_err_q);
 886: 
 887:   // Data can be loaded into the skid buffer for an unaligned uncompressed instruction
 888:   assign skid_ready = output_addr_q[1] & ~skid_valid_q & (~output_compressed | output_err);
 889: 
 890:   assign output_ready = (ready_i | skid_ready) & ~skid_complete_instr;
 891: 
 892:   assign output_compressed = (rdata_o[1:0] != 2'b11);
 893: 
 894:   assign skid_valid_d =
 895:       // Branches invalidate the skid buffer
 896:       branch_i      ? 1'b0 :
 897:       // Once valid, the skid buffer stays valid until a compressed instruction realigns the stream
 898:       (skid_valid_q ? ~(ready_i & ((skid_data_q[1:0] != 2'b11) | skid_err_q)) :
 899:       // The skid buffer becomes valid when:
 900:                         // - we branch to an unaligned uncompressed instruction
 901:                       (((output_addr_q[1] & (~output_compressed | output_err)) |
 902:                         // - a compressed instruction misaligns the stream
 903:                         (~output_addr_q[1] & output_compressed & ~output_err & ready_i)) & data_valid));
 904: 
 905:   always_ff @(posedge clk_i or negedge rst_ni) begin
 906:     if (!rst_ni) begin
 907:       skid_valid_q <= 1'b0;
 908:     end else begin
 909:       skid_valid_q <= skid_valid_d;
 910:     end
 911:   end
 912: 
 913:   // Signal that valid data is available to the IF stage
 914:   // Note that if the first half of an unaligned instruction reports an error, we do not need
 915:   // to wait for the second half (and for PMP errors we might not have fetched the second half)
 916:                         // Compressed instruction completely satisfied by skid buffer
 917:   assign output_valid = skid_complete_instr |
 918:                         // Output data available and, output stream aligned, or skid data available,
 919:                         (data_valid & (~output_addr_q[1] | skid_valid_q |
 920:                                        // or this is an error or an unaligned compressed instruction
 921:                                        output_err | (output_data[17:16] != 2'b11)));
 922: 
 923:   // Update the address on branches and every time an instruction is driven
 924:   assign output_addr_en = branch_i | (ready_i & valid_o);
 925: 
 926:   // Increment the address by two every time a compressed instruction is popped
 927:   assign addr_incr_two = output_compressed & ~err_o;
 928: 
 929:   assign output_addr_d = branch_i ? addr_i[31:1] :
 930:                                     (output_addr_q[31:1] +
 931:                                      // Increment address by 4 or 2
 932:                                      {29'd0, ~addr_incr_two, addr_incr_two});
 933: 
 934:   always_ff @(posedge clk_i) begin
 935:     if (output_addr_en) begin
 936:       output_addr_q <= output_addr_d;
 937:     end
 938:   end
 939: 
 940:   // Mux the data from BusWidth to halfword
 941:   // This muxing realigns data when instruction words are split across BUS_W e.g.
 942:   // word 1 |----|*h1*|
 943:   // word 0 |*h0*|----| --> |*h1*|*h0*|
 944:   //        31   15   0     31   15   0
 945:   always_comb begin
 946:     output_data_lo = '0;
 947:     for (int i = 0; i < OUTPUT_BEATS; i++) begin
 948:       if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
 949:         output_data_lo |= output_data[i*16+:16];
 950:       end
 951:     end
 952:   end
 953: 
 954:   always_comb begin
 955:     output_data_hi = '0;
 956:     for (int i = 0; i < OUTPUT_BEATS-1; i++) begin
 957:       if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
 958:         output_data_hi |= output_data[(i+1)*16+:16];
 959:       end
 960:     end
 961:     if (&output_addr_q[BUS_W-1:1]) begin
 962:       output_data_hi |= output_data[15:0];
 963:     end
 964:   end
 965: 
 966:   assign valid_o     = output_valid;
 967:   assign rdata_o     = {output_data_hi, (skid_valid_q ? skid_data_q : output_data_lo)};
 968:   assign addr_o      = {output_addr_q, 1'b0};
 969:   assign err_o       = (skid_valid_q & skid_err_q) | (~skid_complete_instr & output_err);
 970:   // Error caused by the second half of a misaligned uncompressed instruction
 971:   // (only relevant when err_o is set)
 972:   assign err_plus2_o = skid_valid_q & ~skid_err_q;
 973: 
 974:   ///////////////////
 975:   // Invalidations //
 976:   ///////////////////
 977: 
 978:   // Invalidate on reset, or when instructed. If an invalidation request is received while a
 979:   // previous invalidation is ongoing, it does not need to be restarted.
 980:   assign start_inval   = (~reset_inval_q | icache_inval_i) & ~inval_prog_q;
 981:   assign inval_prog_d  = start_inval | (inval_prog_q & ~inval_done);
 982:   assign inval_done    = &inval_index_q;
 983:   assign inval_index_d = start_inval ? '0 :
 984:                                        (inval_index_q + {{INDEX_W-1{1'b0}},1'b1});
 985: 
 986:   always_ff @(posedge clk_i or negedge rst_ni) begin
 987:     if (!rst_ni) begin
 988:       inval_prog_q  <= 1'b0;
 989:       reset_inval_q <= 1'b0;
 990:     end else begin
 991:       inval_prog_q  <= inval_prog_d;
 992:       reset_inval_q <= 1'b1;
 993:     end
 994:   end
 995: 
 996:   always_ff @(posedge clk_i) begin
 997:     if (inval_prog_d) begin
 998:       inval_index_q <= inval_index_d;
 999:     end
1000:   end
1001: 
1002:   /////////////////
1003:   // Busy status //
1004:   /////////////////
1005: 
1006:   // Only busy (for WFI purposes) while an invalidation is in-progress, or external requests are
1007:   // outstanding.
1008:   assign busy_o = inval_prog_q | (|(fill_busy_q & ~fill_rvd_done));
1009: 
1010:   ////////////////
1011:   // Assertions //
1012:   ////////////////
1013: 
1014:   `ASSERT_INIT(size_param_legal, (LineSize > 32))
1015: 
1016:   // ECC primitives will need to be changed for different sizes
1017:   `ASSERT_INIT(ecc_tag_param_legal, (TAG_SIZE <= 27))
1018:   `ASSERT_INIT(ecc_data_param_legal, (LineSize <= 121))
1019: 
1020: endmodule
1021: