../src/lowrisc_ibex_ibex_icache_0.1/rtl/ibex_icache.sv Cov: 59.6%
1: // Copyright lowRISC contributors.
2: // Licensed under the Apache License, Version 2.0, see LICENSE for details.
3: // SPDX-License-Identifier: Apache-2.0
4:
5: /**
6: * Instruction cache
7: *
8: * Provides an instruction cache along with cache management, instruction buffering and prefetching
9: */
10:
11: `include "prim_assert.sv"
12:
13: module ibex_icache #(
14: // Cache arrangement parameters
15: parameter int unsigned BusWidth = 32,
16: parameter int unsigned CacheSizeBytes = 4*1024,
17: parameter bit ICacheECC = 1'b0,
18: parameter int unsigned LineSize = 64,
19: parameter int unsigned NumWays = 2,
20: // Always make speculative bus requests in parallel with lookups
21: parameter bit SpecRequest = 1'b0,
22: // Only cache branch targets
23: parameter bit BranchCache = 1'b0
24: ) (
25: // Clock and reset
26: input logic clk_i,
27: input logic rst_ni,
28:
29: // Signal that the core would like instructions
30: input logic req_i,
31:
32: // Set the cache's address counter
33: input logic branch_i,
34: input logic branch_spec_i,
35: input logic [31:0] addr_i,
36:
37: // IF stage interface: Pass fetched instructions to the core
38: input logic ready_i,
39: output logic valid_o,
40: output logic [31:0] rdata_o,
41: output logic [31:0] addr_o,
42: output logic err_o,
43: output logic err_plus2_o,
44:
45: // Instruction memory / interconnect interface: Fetch instruction data from memory
46: output logic instr_req_o,
47: input logic instr_gnt_i,
48: output logic [31:0] instr_addr_o,
49: input logic [BusWidth-1:0] instr_rdata_i,
50: input logic instr_err_i,
51: input logic instr_pmp_err_i,
52: input logic instr_rvalid_i,
53:
54: // Cache status
55: input logic icache_enable_i,
56: input logic icache_inval_i,
57: output logic busy_o
58: );
59:
60: // NOTE RTL IS DRAFT
61:
62: // Local constants
63: localparam int unsigned ADDR_W = 32;
64: // Number of fill buffers (must be >= 2)
65: localparam int unsigned NUM_FB = 4;
66: // Request throttling threshold
67: localparam int unsigned FB_THRESHOLD = NUM_FB - 2;
68: // Derived parameters
69: localparam int unsigned LINE_SIZE_ECC = ICacheECC ? (LineSize + 8) : LineSize;
70: localparam int unsigned LINE_SIZE_BYTES = LineSize/8;
71: localparam int unsigned LINE_W = $clog2(LINE_SIZE_BYTES);
72: localparam int unsigned BUS_BYTES = BusWidth/8;
73: localparam int unsigned BUS_W = $clog2(BUS_BYTES);
74: localparam int unsigned LINE_BEATS = LINE_SIZE_BYTES / BUS_BYTES;
75: localparam int unsigned LINE_BEATS_W = $clog2(LINE_BEATS);
76: localparam int unsigned NUM_LINES = CacheSizeBytes / NumWays / LINE_SIZE_BYTES;
77: localparam int unsigned INDEX_W = $clog2(NUM_LINES);
78: localparam int unsigned INDEX_HI = INDEX_W + LINE_W - 1;
79: localparam int unsigned TAG_SIZE = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit
80: localparam int unsigned TAG_SIZE_ECC = ICacheECC ? (TAG_SIZE + 6) : TAG_SIZE;
81: localparam int unsigned OUTPUT_BEATS = (BUS_BYTES / 2); // number of halfwords
82:
83: // Prefetch signals
84: logic [ADDR_W-1:0] lookup_addr_aligned;
85: logic [ADDR_W-1:0] prefetch_addr_d, prefetch_addr_q;
86: logic prefetch_addr_en;
87: // Cache pipelipe IC0 signals
88: logic branch_suppress;
89: logic lookup_throttle;
90: logic lookup_req_ic0;
91: logic [ADDR_W-1:0] lookup_addr_ic0;
92: logic [INDEX_W-1:0] lookup_index_ic0;
93: logic fill_req_ic0;
94: logic [INDEX_W-1:0] fill_index_ic0;
95: logic [TAG_SIZE-1:0] fill_tag_ic0;
96: logic [LineSize-1:0] fill_wdata_ic0;
97: logic lookup_grant_ic0;
98: logic lookup_actual_ic0;
99: logic fill_grant_ic0;
100: logic tag_req_ic0;
101: logic [INDEX_W-1:0] tag_index_ic0;
102: logic [NumWays-1:0] tag_banks_ic0;
103: logic tag_write_ic0;
104: logic [TAG_SIZE_ECC-1:0] tag_wdata_ic0;
105: logic data_req_ic0;
106: logic [INDEX_W-1:0] data_index_ic0;
107: logic [NumWays-1:0] data_banks_ic0;
108: logic data_write_ic0;
109: logic [LINE_SIZE_ECC-1:0] data_wdata_ic0;
110: // Cache pipelipe IC1 signals
111: logic [TAG_SIZE_ECC-1:0] tag_rdata_ic1 [NumWays];
112: logic [LINE_SIZE_ECC-1:0] data_rdata_ic1 [NumWays];
113: logic [LINE_SIZE_ECC-1:0] hit_data_ic1;
114: logic lookup_valid_ic1;
115: logic [ADDR_W-1:INDEX_HI+1] lookup_addr_ic1;
116: logic [NumWays-1:0] tag_match_ic1;
117: logic tag_hit_ic1;
118: logic [NumWays-1:0] tag_invalid_ic1;
119: logic [NumWays-1:0] lowest_invalid_way_ic1;
120: logic [NumWays-1:0] round_robin_way_ic1, round_robin_way_q;
121: logic [NumWays-1:0] sel_way_ic1;
122: logic ecc_err_ic1;
123: logic ecc_write_req;
124: logic [NumWays-1:0] ecc_write_ways;
125: logic [INDEX_W-1:0] ecc_write_index;
126: // Fill buffer signals
127: logic gnt_or_pmp_err, gnt_not_pmp_err;
128: logic [$clog2(NUM_FB)-1:0] fb_fill_level;
129: logic fill_cache_new;
130: logic fill_new_alloc;
131: logic fill_spec_req, fill_spec_done, fill_spec_hold;
132: logic [NUM_FB-1:0][NUM_FB-1:0] fill_older_d, fill_older_q;
133: logic [NUM_FB-1:0] fill_alloc_sel, fill_alloc;
134: logic [NUM_FB-1:0] fill_busy_d, fill_busy_q;
135: logic [NUM_FB-1:0] fill_done;
136: logic [NUM_FB-1:0] fill_in_ic1;
137: logic [NUM_FB-1:0] fill_stale_d, fill_stale_q;
138: logic [NUM_FB-1:0] fill_cache_d, fill_cache_q;
139: logic [NUM_FB-1:0] fill_hit_ic1, fill_hit_d, fill_hit_q;
140: logic [NUM_FB-1:0][LINE_BEATS_W:0] fill_ext_cnt_d, fill_ext_cnt_q;
141: logic [NUM_FB-1:0] fill_ext_hold_d, fill_ext_hold_q;
142: logic [NUM_FB-1:0] fill_ext_done;
143: logic [NUM_FB-1:0][LINE_BEATS_W:0] fill_rvd_cnt_d, fill_rvd_cnt_q;
144: logic [NUM_FB-1:0] fill_rvd_done;
145: logic [NUM_FB-1:0] fill_ram_done_d, fill_ram_done_q;
146: logic [NUM_FB-1:0] fill_out_grant;
147: logic [NUM_FB-1:0][LINE_BEATS_W:0] fill_out_cnt_d, fill_out_cnt_q;
148: logic [NUM_FB-1:0] fill_out_done;
149: logic [NUM_FB-1:0] fill_ext_req, fill_rvd_exp, fill_ram_req, fill_out_req;
150: logic [NUM_FB-1:0] fill_data_sel, fill_data_reg, fill_data_hit, fill_data_rvd;
151: logic [NUM_FB-1:0][LINE_BEATS_W-1:0] fill_ext_off, fill_rvd_off;
152: logic [NUM_FB-1:0][LINE_BEATS_W:0] fill_rvd_beat;
153: logic [NUM_FB-1:0] fill_ext_arb, fill_ram_arb, fill_out_arb;
154: logic [NUM_FB-1:0] fill_rvd_arb;
155: logic [NUM_FB-1:0] fill_entry_en;
156: logic [NUM_FB-1:0] fill_addr_en;
157: logic [NUM_FB-1:0] fill_way_en;
158: logic [NUM_FB-1:0][LINE_BEATS-1:0] fill_data_en;
159: logic [NUM_FB-1:0][LINE_BEATS-1:0] fill_err_d, fill_err_q;
160: logic [ADDR_W-1:0] fill_addr_q [NUM_FB];
161: logic [NumWays-1:0] fill_way_q [NUM_FB];
162: logic [LineSize-1:0] fill_data_d [NUM_FB];
163: logic [LineSize-1:0] fill_data_q [NUM_FB];
164: logic [ADDR_W-1:BUS_W] fill_ext_req_addr;
165: logic [ADDR_W-1:0] fill_ram_req_addr;
166: logic [NumWays-1:0] fill_ram_req_way;
167: logic [LineSize-1:0] fill_ram_req_data;
168: logic [LineSize-1:0] fill_out_data;
169: logic [LINE_BEATS-1:0] fill_out_err;
170: // External req signals
171: logic instr_req;
172: logic [ADDR_W-1:BUS_W] instr_addr;
173: // Data output signals
174: logic skid_complete_instr;
175: logic skid_ready;
176: logic output_compressed;
177: logic skid_valid_d, skid_valid_q, skid_en;
178: logic [15:0] skid_data_d, skid_data_q;
179: logic skid_err_q;
180: logic output_valid;
181: logic addr_incr_two;
182: logic output_addr_en;
183: logic [ADDR_W-1:1] output_addr_d, output_addr_q;
184: logic [15:0] output_data_lo, output_data_hi;
185: logic data_valid, output_ready;
186: logic [LineSize-1:0] line_data;
187: logic [LINE_BEATS-1:0] line_err;
188: logic [31:0] line_data_muxed;
189: logic line_err_muxed;
190: logic [31:0] output_data;
191: logic output_err;
192: // Invalidations
193: logic start_inval, inval_done;
194: logic reset_inval_q;
195: logic inval_prog_d, inval_prog_q;
196: logic [INDEX_W-1:0] inval_index_d, inval_index_q;
197:
198: //////////////////////////
199: // Instruction prefetch //
200: //////////////////////////
201:
202: assign lookup_addr_aligned = {lookup_addr_ic0[ADDR_W-1:LINE_W],{LINE_W{1'b0}}};
203:
204: // The prefetch address increments by one cache line for each granted request.
205: // This address is also updated if there is a branch that is not granted, since the target
206: // address (addr_i) is only valid for one cycle while branch_i is high.
207:
208: // The captured branch target address is not forced to be aligned since the offset in the cache
209: // line must also be recorded for later use by the fill buffers.
210: assign prefetch_addr_d =
211: lookup_grant_ic0 ? (lookup_addr_aligned + {{ADDR_W-LINE_W-1{1'b0}},1'b1,{LINE_W{1'b0}}}) :
212: addr_i;
213:
214: assign prefetch_addr_en = branch_i | lookup_grant_ic0;
215:
216: always_ff @(posedge clk_i) begin
217: if (prefetch_addr_en) begin
218: prefetch_addr_q <= prefetch_addr_d;
219: end
220: end
221:
222: ////////////////////////
223: // Pipeline stage IC0 //
224: ////////////////////////
225:
226: // Cache lookup
227: assign lookup_throttle = (fb_fill_level > FB_THRESHOLD[$clog2(NUM_FB)-1:0]);
228:
229: assign lookup_req_ic0 = req_i & ~&fill_busy_q & (branch_i | ~lookup_throttle) & ~ecc_write_req;
230: assign lookup_addr_ic0 = branch_spec_i ? addr_i :
231: prefetch_addr_q;
232: assign lookup_index_ic0 = lookup_addr_ic0[INDEX_HI:LINE_W];
233:
234: // Cache write
235: assign fill_req_ic0 = (|fill_ram_req);
236: assign fill_index_ic0 = fill_ram_req_addr[INDEX_HI:LINE_W];
237: assign fill_tag_ic0 = {(~inval_prog_q & ~ecc_write_req),fill_ram_req_addr[ADDR_W-1:INDEX_HI+1]};
238: assign fill_wdata_ic0 = fill_ram_req_data;
239:
240: // Suppress a new lookup on a not-taken branch (as the address will be incorrect)
241: assign branch_suppress = branch_spec_i & ~branch_i;
242:
243: // Arbitrated signals - lookups have highest priority
244: assign lookup_grant_ic0 = lookup_req_ic0 & ~branch_suppress;
245: assign fill_grant_ic0 = fill_req_ic0 & (~lookup_req_ic0 | branch_suppress) & ~inval_prog_q &
246: ~ecc_write_req;
247: // Qualified lookup grant to mask ram signals in IC1 if access was not made
248: assign lookup_actual_ic0 = lookup_grant_ic0 & icache_enable_i & ~inval_prog_q & ~start_inval;
249:
250: // Tagram
251: assign tag_req_ic0 = lookup_req_ic0 | fill_req_ic0 | inval_prog_q | ecc_write_req;
252: assign tag_index_ic0 = inval_prog_q ? inval_index_q :
253: ecc_write_req ? ecc_write_index :
254: fill_grant_ic0 ? fill_index_ic0 :
255: lookup_index_ic0;
256: assign tag_banks_ic0 = ecc_write_req ? ecc_write_ways :
257: fill_grant_ic0 ? fill_ram_req_way :
258: {NumWays{1'b1}};
259: assign tag_write_ic0 = fill_grant_ic0 | inval_prog_q | ecc_write_req;
260:
261: // Dataram
262: assign data_req_ic0 = lookup_req_ic0 | fill_req_ic0;
263: assign data_index_ic0 = tag_index_ic0;
264: assign data_banks_ic0 = tag_banks_ic0;
265: assign data_write_ic0 = tag_write_ic0;
266:
267: // Append ECC checkbits to write data if required
268: if (ICacheECC) begin : gen_ecc_wdata
269:
270: // Tagram ECC
271: // Reuse the same ecc encoding module for larger cache sizes by padding with zeros
272: logic [21:0] tag_ecc_input_padded;
273: logic [27:0] tag_ecc_output_padded;
274: logic [22-TAG_SIZE:0] tag_ecc_output_unused;
275:
276: assign tag_ecc_input_padded = {{22-TAG_SIZE{1'b0}},fill_tag_ic0};
277: assign tag_ecc_output_unused = tag_ecc_output_padded[21:TAG_SIZE-1];
278:
279: prim_secded_28_22_enc tag_ecc_enc (
280: .in (tag_ecc_input_padded),
281: .out (tag_ecc_output_padded)
282: );
283:
284: assign tag_wdata_ic0 = {tag_ecc_output_padded[27:22],tag_ecc_output_padded[TAG_SIZE-1:0]};
285:
286: // Dataram ECC
287: prim_secded_72_64_enc data_ecc_enc (
288: .in (fill_wdata_ic0),
289: .out (data_wdata_ic0)
290: );
291:
292: end else begin : gen_noecc_wdata
293: assign tag_wdata_ic0 = fill_tag_ic0;
294: assign data_wdata_ic0 = fill_wdata_ic0;
295: end
296:
297: ////////////////
298: // IC0 -> IC1 //
299: ////////////////
300:
301: for (genvar way = 0; way < NumWays; way++) begin : gen_rams
302: // Tag RAM instantiation
303: prim_ram_1p #(
304: .Width (TAG_SIZE_ECC),
305: .Depth (NUM_LINES),
306: .DataBitsPerMask (TAG_SIZE_ECC)
307: ) tag_bank (
308: .clk_i (clk_i),
309: .req_i (tag_req_ic0 & tag_banks_ic0[way]),
310: .write_i (tag_write_ic0),
311: .wmask_i ({TAG_SIZE_ECC{1'b1}}),
312: .addr_i (tag_index_ic0),
313: .wdata_i (tag_wdata_ic0),
314: .rdata_o (tag_rdata_ic1[way])
315: );
316: // Data RAM instantiation
317: prim_ram_1p #(
318: .Width (LINE_SIZE_ECC),
319: .Depth (NUM_LINES),
320: .DataBitsPerMask (LINE_SIZE_ECC)
321: ) data_bank (
322: .clk_i (clk_i),
323: .req_i (data_req_ic0 & data_banks_ic0[way]),
324: .write_i (data_write_ic0),
325: .wmask_i ({LINE_SIZE_ECC{1'b1}}),
326: .addr_i (data_index_ic0),
327: .wdata_i (data_wdata_ic0),
328: .rdata_o (data_rdata_ic1[way])
329: );
330: end
331:
332: always_ff @(posedge clk_i or negedge rst_ni) begin
333: if (!rst_ni) begin
334: lookup_valid_ic1 <= 1'b0;
335: end else begin
336: lookup_valid_ic1 <= lookup_actual_ic0;
337: end
338: end
339:
340: always_ff @(posedge clk_i) begin
341: if (lookup_grant_ic0) begin
342: lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:INDEX_HI+1];
343: fill_in_ic1 <= fill_alloc_sel;
344: end
345: end
346:
347: ////////////////////////
348: // Pipeline stage IC1 //
349: ////////////////////////
350:
351: // Tag matching
352: for (genvar way = 0; way < NumWays; way++) begin : gen_tag_match
353: assign tag_match_ic1[way] = (tag_rdata_ic1[way][TAG_SIZE-1:0] ==
354: {1'b1,lookup_addr_ic1[ADDR_W-1:INDEX_HI+1]});
355: assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][TAG_SIZE-1];
356: end
357:
358: assign tag_hit_ic1 = |tag_match_ic1;
359:
360: // Hit data mux
361: always_comb begin
362: hit_data_ic1 = 'b0;
363: for (int way = 0; way < NumWays; way++) begin
364: if (tag_match_ic1[way]) begin
365: hit_data_ic1 |= data_rdata_ic1[way];
366: end
367: end
368: end
369:
370: // Way selection for allocations to the cache (onehot signals)
371: // 1 first invalid way
372: // 2 global round-robin (pseudorandom) way
373: assign lowest_invalid_way_ic1[0] = tag_invalid_ic1[0];
374: assign round_robin_way_ic1[0] = round_robin_way_q[NumWays-1];
375: for (genvar way = 1; way < NumWays; way++) begin : gen_lowest_way
376: assign lowest_invalid_way_ic1[way] = tag_invalid_ic1[way] & ~|tag_invalid_ic1[way-1:0];
377: assign round_robin_way_ic1[way] = round_robin_way_q[way-1];
378: end
379:
380: always_ff @(posedge clk_i or negedge rst_ni) begin
381: if (!rst_ni) begin
382: round_robin_way_q <= {{NumWays-1{1'b0}},1'b1};
383: end else if (lookup_valid_ic1) begin
384: round_robin_way_q <= round_robin_way_ic1;
385: end
386: end
387:
388: assign sel_way_ic1 = |tag_invalid_ic1 ? lowest_invalid_way_ic1 :
389: round_robin_way_q;
390:
391: // ECC checking logic
392: if (ICacheECC) begin : gen_data_ecc_checking
393: logic [NumWays-1:0] tag_err_ic1;
394: logic [1:0] data_err_ic1;
395: logic ecc_correction_write_d, ecc_correction_write_q;
396: logic [NumWays-1:0] ecc_correction_ways_d, ecc_correction_ways_q;
397: logic [INDEX_W-1:0] lookup_index_ic1, ecc_correction_index_q;
398:
399: // Tag ECC checking
400: for (genvar way = 0; way < NumWays; way++) begin : gen_tag_ecc
401: logic [1:0] tag_err_bank_ic1;
402: logic [27:0] tag_rdata_padded_ic1;
403:
404: // Expand the tag rdata with extra padding if the tag size is less than the maximum
405: assign tag_rdata_padded_ic1 = {tag_rdata_ic1[way][TAG_SIZE_ECC-1-:6],
406: {22-TAG_SIZE{1'b0}},
407: tag_rdata_ic1[way][TAG_SIZE-1:0]};
408:
409: prim_secded_28_22_dec data_ecc_dec (
410: .in (tag_rdata_padded_ic1),
411: .d_o (),
412: .syndrome_o (),
413: .err_o (tag_err_bank_ic1)
414: );
415: assign tag_err_ic1[way] = |tag_err_bank_ic1;
416: end
417:
418: // Data ECC checking
419: // Note - could generate for all ways and mux after
420: prim_secded_72_64_dec data_ecc_dec (
421: .in (hit_data_ic1),
422: .d_o (),
423: .syndrome_o (),
424: .err_o (data_err_ic1)
425: );
426:
427: assign ecc_err_ic1 = lookup_valid_ic1 & ((|data_err_ic1) | (|tag_err_ic1));
428:
429: // Error correction
430: // The way(s) producing the error will be invalidated in the next cycle.
431: assign ecc_correction_ways_d = tag_err_ic1 | (tag_match_ic1 & {NumWays{|data_err_ic1}});
432: assign ecc_correction_write_d = ecc_err_ic1;
433:
434: always_ff @(posedge clk_i or negedge rst_ni) begin
435: if (!rst_ni) begin
436: ecc_correction_write_q <= 1'b0;
437: end else begin
438: ecc_correction_write_q <= ecc_correction_write_d;
439: end
440: end
441:
442: // The index is required in IC1 only when ECC is configured so is registered here
443: always_ff @(posedge clk_i) begin
444: if (lookup_grant_ic0) begin
445: lookup_index_ic1 <= lookup_addr_ic0[INDEX_HI-:INDEX_W];
446: end
447: end
448:
449: // Store the ways with errors to be invalidated
450: always_ff @(posedge clk_i) begin
451: if (ecc_err_ic1) begin
452: ecc_correction_ways_q <= ecc_correction_ways_d;
453: ecc_correction_index_q <= lookup_index_ic1;
454: end
455: end
456:
457: assign ecc_write_req = ecc_correction_write_q;
458: assign ecc_write_ways = ecc_correction_ways_q;
459: assign ecc_write_index = ecc_correction_index_q;
460:
461: end else begin : gen_no_data_ecc
462: assign ecc_err_ic1 = 1'b0;
463: assign ecc_write_req = 1'b0;
464: assign ecc_write_ways = '0;
465: assign ecc_write_index = '0;
466: end
467:
468: ///////////////////////////////
469: // Cache allocation decision //
470: ///////////////////////////////
471:
472: if (BranchCache) begin : gen_caching_logic
473:
474: // Cache branch target + a number of subsequent lines
475: localparam int unsigned CACHE_AHEAD = 2;
476: localparam int unsigned CACHE_CNT_W = (CACHE_AHEAD == 1) ? 1 : $clog2(CACHE_AHEAD) + 1;
477: logic cache_cnt_dec;
478: logic [CACHE_CNT_W-1:0] cache_cnt_d, cache_cnt_q;
479:
480: assign cache_cnt_dec = lookup_grant_ic0 & (|cache_cnt_q);
481: assign cache_cnt_d = branch_i ? CACHE_AHEAD[CACHE_CNT_W-1:0] :
482: (cache_cnt_q - {{CACHE_CNT_W-1{1'b0}},cache_cnt_dec});
483:
484: always_ff @(posedge clk_i or negedge rst_ni) begin
485: if (!rst_ni) begin
486: cache_cnt_q <= '0;
487: end else begin
488: cache_cnt_q <= cache_cnt_d;
489: end
490: end
491:
492: assign fill_cache_new = (branch_i | (|cache_cnt_q)) & icache_enable_i &
493: ~icache_inval_i & ~inval_prog_q;
494:
495: end else begin : gen_cache_all
496:
497: // Cache all missing fetches
498: assign fill_cache_new = icache_enable_i & ~start_inval & ~inval_prog_q;
499: end
500:
501: //////////////////////////
502: // Fill buffer tracking //
503: //////////////////////////
504:
505: always_comb begin
506: fb_fill_level = '0;
507: for (int i = 0; i < NUM_FB; i++) begin
508: if (fill_busy_q[i] & ~fill_stale_q[i]) begin
509: fb_fill_level += {{$clog2(NUM_FB)-1{1'b0}},1'b1};
510: end
511: end
512: end
513:
514: // PMP errors might not / don't need to be granted (since the external request is masked)
515: assign gnt_or_pmp_err = instr_gnt_i | instr_pmp_err_i;
516: assign gnt_not_pmp_err = instr_gnt_i & ~instr_pmp_err_i;
517: // Allocate a new buffer for every granted lookup
518: assign fill_new_alloc = lookup_grant_ic0;
519: // Track whether a speculative external request was made from IC0, and whether it was granted
520: assign fill_spec_req = (SpecRequest | branch_i) & ~|fill_ext_req;
521: assign fill_spec_done = fill_spec_req & gnt_not_pmp_err;
522: assign fill_spec_hold = fill_spec_req & ~gnt_or_pmp_err;
523:
524: for (genvar fb = 0; fb < NUM_FB; fb++) begin : gen_fbs
525:
526: /////////////////////////////
527: // Fill buffer allocations //
528: /////////////////////////////
529:
530: // Allocate the lowest available buffer
531: if (fb == 0) begin : gen_fb_zero
532: assign fill_alloc_sel[fb] = ~fill_busy_q[fb];
533: end else begin : gen_fb_rest
534: assign fill_alloc_sel[fb] = ~fill_busy_q[fb] & (&fill_busy_q[fb-1:0]);
535: end
536:
537: assign fill_alloc[fb] = fill_alloc_sel[fb] & fill_new_alloc;
538: assign fill_busy_d[fb] = fill_alloc[fb] | (fill_busy_q[fb] & ~fill_done[fb]);
539:
540: // Track which other fill buffers are older than this one (for age-based arbitration)
541: // TODO sparsify
542: assign fill_older_d[fb] = (fill_alloc[fb] ? fill_busy_q : fill_older_q[fb]) & ~fill_done;
543:
544: // A fill buffer can release once all its actions are completed
545: // all data written to the cache (unless hit or error)
546: assign fill_done[fb] = (fill_ram_done_q[fb] | fill_hit_q[fb] | ~fill_cache_q[fb] |
547: (|fill_err_q[fb])) &
548: // all data output unless stale due to intervening branch
549: (fill_out_done[fb] | fill_stale_q[fb] | branch_i) &
550: // all external requests completed
551: fill_rvd_done[fb];
552:
553: /////////////////////////////////
554: // Fill buffer status tracking //
555: /////////////////////////////////
556:
557: // Track staleness (requests become stale when a branch intervenes)
558: assign fill_stale_d[fb] = fill_busy_q[fb] & (branch_i | fill_stale_q[fb]);
559: // Track whether or not this request should allocate to the cache
560: // Any invalidation or disabling of the cache while the buffer is busy will stop allocation
561: assign fill_cache_d[fb] = (fill_alloc[fb] & fill_cache_new) |
562: (fill_cache_q[fb] & fill_busy_q[fb] &
563: icache_enable_i & ~icache_inval_i);
564: // Record whether the request hit in the cache
565: assign fill_hit_ic1[fb] = lookup_valid_ic1 & fill_in_ic1[fb] & tag_hit_ic1;
566: assign fill_hit_d[fb] = (fill_hit_ic1[fb] & ~ecc_err_ic1) |
567: (fill_hit_q[fb] & fill_busy_q[fb]);
568:
569: ///////////////////////////////////////////
570: // Fill buffer external request tracking //
571: ///////////////////////////////////////////
572:
573: // Make an external request
574: assign fill_ext_req[fb] = fill_busy_q[fb] & ~fill_ext_done[fb];
575:
576: // Count the number of completed external requests (each line requires LINE_BEATS requests)
577: // Don't count fake PMP error grants here since they will never receive an rvalid response
578: assign fill_ext_cnt_d[fb] = fill_alloc[fb] ?
579: {{LINE_BEATS_W{1'b0}},fill_spec_done} :
580: (fill_ext_cnt_q[fb] + {{LINE_BEATS_W{1'b0}},
581: fill_ext_arb[fb] & gnt_not_pmp_err});
582: // External request must be held until granted
583: assign fill_ext_hold_d[fb] = (fill_alloc[fb] & fill_spec_hold) |
584: (fill_ext_arb[fb] & ~gnt_or_pmp_err);
585: // External requests are completed when the counter is filled or when the request is cancelled
586: assign fill_ext_done[fb] = (fill_ext_cnt_q[fb][LINE_BEATS_W] |
587: // external requests are considered complete if the request hit
588: (fill_hit_ic1[fb] & ~ecc_err_ic1) | fill_hit_q[fb] |
589: // external requests will stop once any PMP error is received
590: fill_err_q[fb][fill_ext_off[fb]] |
591: // cancel if the line is stale and won't be cached
592: (~fill_cache_q[fb] & (branch_i | fill_stale_q[fb]))) &
593: // can't cancel while we are waiting for a grant on the bus
594: ~fill_ext_hold_q[fb];
595: // Track whether this fill buffer expects to receive beats of data
596: assign fill_rvd_exp[fb] = fill_busy_q[fb] & ~fill_rvd_done[fb];
597: // Count the number of rvalid beats received
598: assign fill_rvd_cnt_d[fb] = fill_alloc[fb] ? '0 :
599: (fill_rvd_cnt_q[fb] +
600: {{LINE_BEATS_W{1'b0}},fill_rvd_arb[fb]});
601: // External data is complete when all issued external requests have received their data
602: assign fill_rvd_done[fb] = fill_ext_done[fb] & (fill_rvd_cnt_q[fb] == fill_ext_cnt_q[fb]);
603:
604: //////////////////////////////////////
605: // Fill buffer data output tracking //
606: //////////////////////////////////////
607:
608: // Send data to the IF stage for requests that are not stale, have not completed their
609: // data output, and have data available to send.
610: // Data is available if:
611: // - The request hit in the cache
612: // - The current beat is an error (since a PMP error might not actually receive any data)
613: // - Buffered data is available (fill_rvd_cnt_q is ahead of fill_out_cnt_q)
614: // - Data is available from the bus this cycle (fill_rvd_arb)
615: assign fill_out_req[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
616: (fill_hit_ic1[fb] | fill_hit_q[fb] |
617: (fill_err_q[fb][fill_out_cnt_q[fb][LINE_BEATS_W-1:0]]) |
618: (fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_rvd_arb[fb]);
619:
620: // Calculate when a beat of data is output. Any ECC error squashes the output that cycle.
621: assign fill_out_grant[fb] = fill_out_arb[fb] & output_ready & ~ecc_err_ic1;
622:
623: // Count the beats of data output to the IF stage
624: assign fill_out_cnt_d[fb] = fill_alloc[fb] ? {1'b0,lookup_addr_ic0[LINE_W-1:BUS_W]} :
625: (fill_out_cnt_q[fb] +
626: {{LINE_BEATS_W{1'b0}},fill_out_grant[fb]});
627: // Data output complete when the counter fills
628: assign fill_out_done[fb] = fill_out_cnt_q[fb][LINE_BEATS_W];
629:
630: //////////////////////////////////////
631: // Fill buffer ram request tracking //
632: //////////////////////////////////////
633:
634: // make a fill request once all data beats received
635: assign fill_ram_req[fb] = fill_busy_q[fb] & fill_rvd_cnt_q[fb][LINE_BEATS_W] &
636: // unless the request hit, was non-allocating or got an error
637: ~fill_hit_q[fb] & fill_cache_q[fb] & ~|fill_err_q[fb] &
638: // or the request was already completed
639: ~fill_ram_done_q[fb];
640:
641: // Record when a cache allocation request has been completed
642: assign fill_ram_done_d[fb] = fill_ram_arb[fb] | (fill_ram_done_q[fb] & fill_busy_q[fb]);
643:
644: //////////////////////////////
645: // Fill buffer line offsets //
646: //////////////////////////////
647:
648: // When we branch into the middle of a line, the output count will not start from zero. This
649: // beat count is used to know which incoming rdata beats are relevant.
650: assign fill_rvd_beat[fb] = {1'b0,fill_addr_q[fb][LINE_W-1:BUS_W]} +
651: fill_rvd_cnt_q[fb][LINE_BEATS_W:0];
652: assign fill_ext_off[fb] = fill_addr_q[fb][LINE_W-1:BUS_W] +
653: fill_ext_cnt_q[fb][LINE_BEATS_W-1:0];
654: assign fill_rvd_off[fb] = fill_rvd_beat[fb][LINE_BEATS_W-1:0];
655:
656: /////////////////////////////
657: // Fill buffer arbitration //
658: /////////////////////////////
659:
660: // Age based arbitration - all these signals are one-hot
661: assign fill_ext_arb[fb] = fill_ext_req[fb] & ~|(fill_ext_req & fill_older_q[fb]);
662: assign fill_ram_arb[fb] = fill_ram_req[fb] & fill_grant_ic0 & ~|(fill_ram_req & fill_older_q[fb]);
663: // Calculate which fill buffer is the oldest one which still needs to output data to IF
664: assign fill_data_sel[fb] = ~|(fill_busy_q & ~fill_out_done & ~fill_stale_q &
665: fill_older_q[fb]);
666: // Arbitrate the request which has data available to send, and is the oldest outstanding
667: assign fill_out_arb[fb] = fill_out_req[fb] & fill_data_sel[fb];
668: // Assign incoming rvalid data to the oldest fill buffer expecting it
669: assign fill_rvd_arb[fb] = instr_rvalid_i & fill_rvd_exp[fb] & ~|(fill_rvd_exp & fill_older_q[fb]);
670:
671: /////////////////////////////
672: // Fill buffer data muxing //
673: /////////////////////////////
674:
675: // Output data muxing controls
676: // 1. Select data from the fill buffer data register
677: assign fill_data_reg[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] &
678: ~fill_out_done[fb] & fill_data_sel[fb] &
679: // The incoming data is already ahead of the output count
680: ((fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_hit_q[fb] |
681: (|fill_err_q[fb]));
682: // 2. Select IC1 hit data
683: assign fill_data_hit[fb] = fill_busy_q[fb] & fill_hit_ic1[fb] & fill_data_sel[fb];
684: // 3. Select incoming instr_rdata_i
685: assign fill_data_rvd[fb] = fill_busy_q[fb] & fill_rvd_arb[fb] & ~fill_hit_q[fb] &
686: ~fill_hit_ic1[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
687: // The incoming data lines up with the output count
688: (fill_rvd_beat[fb] == fill_out_cnt_q[fb]) & fill_data_sel[fb];
689:
690:
691: ///////////////////////////
692: // Fill buffer registers //
693: ///////////////////////////
694:
695: // Fill buffer general enable
696: assign fill_entry_en[fb] = fill_alloc[fb] | fill_busy_q[fb];
697:
698: always_ff @(posedge clk_i or negedge rst_ni) begin
699: if (!rst_ni) begin
700: fill_busy_q[fb] <= 1'b0;
701: fill_older_q[fb] <= '0;
702: fill_stale_q[fb] <= 1'b0;
703: fill_cache_q[fb] <= 1'b0;
704: fill_hit_q[fb] <= 1'b0;
705: fill_ext_cnt_q[fb] <= '0;
706: fill_ext_hold_q[fb] <= 1'b0;
707: fill_rvd_cnt_q[fb] <= '0;
708: fill_ram_done_q[fb] <= 1'b0;
709: fill_out_cnt_q[fb] <= '0;
710: end else if (fill_entry_en[fb]) begin
711: fill_busy_q[fb] <= fill_busy_d[fb];
712: fill_older_q[fb] <= fill_older_d[fb];
713: fill_stale_q[fb] <= fill_stale_d[fb];
714: fill_cache_q[fb] <= fill_cache_d[fb];
715: fill_hit_q[fb] <= fill_hit_d[fb];
716: fill_ext_cnt_q[fb] <= fill_ext_cnt_d[fb];
717: fill_ext_hold_q[fb] <= fill_ext_hold_d[fb];
718: fill_rvd_cnt_q[fb] <= fill_rvd_cnt_d[fb];
719: fill_ram_done_q[fb] <= fill_ram_done_d[fb];
720: fill_out_cnt_q[fb] <= fill_out_cnt_d[fb];
721: end
722: end
723:
724: ////////////////////////////////////////
725: // Fill buffer address / data storage //
726: ////////////////////////////////////////
727:
728: assign fill_addr_en[fb] = fill_alloc[fb];
729: assign fill_way_en[fb] = (lookup_valid_ic1 & fill_in_ic1[fb]);
730:
731: always_ff @(posedge clk_i) begin
732: if (fill_addr_en[fb]) begin
733: fill_addr_q[fb] <= lookup_addr_ic0;
734: end
735: end
736:
737: always_ff @(posedge clk_i) begin
738: if (fill_way_en[fb]) begin
739: fill_way_q[fb] <= sel_way_ic1;
740: end
741: end
742:
743: // Data either comes from the cache or the bus. If there was an ECC error, we must take
744: // the incoming bus data since the cache hit data is corrupted.
745: assign fill_data_d[fb] = (fill_hit_ic1[fb] & ~ecc_err_ic1) ? hit_data_ic1[LineSize-1:0] :
746: {LINE_BEATS{instr_rdata_i}};
747:
748: for (genvar b = 0; b < LINE_BEATS; b++) begin : gen_data_buf
749: // Error tracking (per beat)
750: // Either a PMP error on a speculative request,
751: assign fill_err_d[fb][b] = (instr_pmp_err_i & fill_alloc[fb] & fill_spec_req &
752: (lookup_addr_ic0[LINE_W-1:BUS_W] == b[LINE_BEATS_W-1:0])) |
753: // a PMP error on a fill buffer ext req
754: (instr_pmp_err_i & fill_ext_arb[fb] &
755: (fill_ext_off[fb] == b[LINE_BEATS_W-1:0])) |
756: // Or a data error with instr_rvalid_i
757: (fill_rvd_arb[fb] & instr_err_i &
758: (fill_rvd_off[fb] == b[LINE_BEATS_W-1:0])) |
759: // Hold the error once recorded
760: (fill_busy_q[fb] & fill_err_q[fb][b]);
761:
762: always_ff @(posedge clk_i or negedge rst_ni) begin
763: if (!rst_ni) begin
764: fill_err_q[fb][b] <= '0;
765: end else if (fill_entry_en[fb]) begin
766: fill_err_q[fb][b] <= fill_err_d[fb][b];
767: end
768: end
769:
770: // Enable the relevant part of the data register (or all for cache hits)
771: // Ignore incoming rvalid data when we already have cache hit data
772: assign fill_data_en[fb][b] = fill_hit_ic1[fb] |
773: (fill_rvd_arb[fb] & ~fill_hit_q[fb] &
774: (fill_rvd_off[fb] == b[LINE_BEATS_W-1:0]));
775:
776: always_ff @(posedge clk_i) begin
777: if (fill_data_en[fb][b]) begin
778: fill_data_q[fb][b*BusWidth+:BusWidth] <= fill_data_d[fb][b*BusWidth+:BusWidth];
779: end
780: end
781:
782: end
783: end
784:
785: ////////////////////////////////
786: // Fill buffer one-hot muxing //
787: ////////////////////////////////
788:
789: // External req info
790: always_comb begin
791: fill_ext_req_addr = '0;
792: for (int i = 0; i < NUM_FB; i++) begin
793: if (fill_ext_arb[i]) begin
794: fill_ext_req_addr |= {fill_addr_q[i][ADDR_W-1:LINE_W], fill_ext_off[i]};
795: end
796: end
797: end
798:
799: // Cache req info
800: always_comb begin
801: fill_ram_req_addr = '0;
802: fill_ram_req_way = '0;
803: fill_ram_req_data = '0;
804: for (int i = 0; i < NUM_FB; i++) begin
805: if (fill_ram_arb[i]) begin
806: fill_ram_req_addr |= fill_addr_q[i];
807: fill_ram_req_way |= fill_way_q[i];
808: fill_ram_req_data |= fill_data_q[i];
809: end
810: end
811: end
812:
813: // IF stage output data
814: always_comb begin
815: fill_out_data = '0;
816: fill_out_err = '0;
817: for (int i = 0; i < NUM_FB; i++) begin
818: if (fill_data_reg[i]) begin
819: fill_out_data |= fill_data_q[i];
820: // Ignore any speculative errors accumulated on cache hits
821: fill_out_err |= (fill_err_q[i] & ~{LINE_BEATS{fill_hit_q[i]}});
822: end
823: end
824: end
825:
826: ///////////////////////
827: // External requests //
828: ///////////////////////
829:
830: assign instr_req = ((SpecRequest | branch_i) & lookup_grant_ic0) |
831: |fill_ext_req;
832:
833: assign instr_addr = |fill_ext_req ? fill_ext_req_addr :
834: lookup_addr_ic0[ADDR_W-1:BUS_W];
835:
836: assign instr_req_o = instr_req;
837: assign instr_addr_o = {instr_addr[ADDR_W-1:BUS_W],{BUS_W{1'b0}}};
838:
839: ////////////////////////
840: // Output data muxing //
841: ////////////////////////
842:
843: // Mux between line-width data sources
844: assign line_data = |fill_data_hit ? hit_data_ic1[LineSize-1:0] : fill_out_data;
845: assign line_err = |fill_data_hit ? {LINE_BEATS{1'b0}} : fill_out_err;
846:
847: // Mux the relevant beat of line data, based on the output address
848: always_comb begin
849: line_data_muxed = '0;
850: line_err_muxed = 1'b0;
851: for (int i = 0; i < LINE_BEATS; i++) begin
852: // When data has been skidded, the output address is behind by one
853: if ((output_addr_q[LINE_W-1:BUS_W] + {{LINE_BEATS_W-1{1'b0}},skid_valid_q}) ==
854: i[LINE_BEATS_W-1:0]) begin
855: line_data_muxed |= line_data[i*32+:32];
856: line_err_muxed |= line_err[i];
857: end
858: end
859: end
860:
861: // Mux between incoming rdata and the muxed line data
862: assign output_data = |fill_data_rvd ? instr_rdata_i : line_data_muxed;
863: assign output_err = |fill_data_rvd ? instr_err_i : line_err_muxed;
864:
865: // Output data is valid (from any of the three possible sources). Note that fill_out_arb
866: // must be used here rather than fill_out_req because data can become valid out of order
867: // (e.g. cache hit data can become available ahead of an older outstanding miss).
868: // Any ECC error suppresses the output that cycle.
869: assign data_valid = |fill_out_arb & ~ecc_err_ic1;
870:
871: // Skid buffer data
872: assign skid_data_d = output_data[31:16];
873:
874: assign skid_en = data_valid & (ready_i | skid_ready);
875:
876: always_ff @(posedge clk_i) begin
877: if (skid_en) begin
878: skid_data_q <= skid_data_d;
879: skid_err_q <= output_err;
880: end
881: end
882:
883: // The data in the skid buffer is ready if it's a complete compressed instruction or if there's
884: // an error (no need to wait for the second half)
885: assign skid_complete_instr = skid_valid_q & ((skid_data_q[1:0] != 2'b11) | skid_err_q);
886:
887: // Data can be loaded into the skid buffer for an unaligned uncompressed instruction
888: assign skid_ready = output_addr_q[1] & ~skid_valid_q & (~output_compressed | output_err);
889:
890: assign output_ready = (ready_i | skid_ready) & ~skid_complete_instr;
891:
892: assign output_compressed = (rdata_o[1:0] != 2'b11);
893:
894: assign skid_valid_d =
895: // Branches invalidate the skid buffer
896: branch_i ? 1'b0 :
897: // Once valid, the skid buffer stays valid until a compressed instruction realigns the stream
898: (skid_valid_q ? ~(ready_i & ((skid_data_q[1:0] != 2'b11) | skid_err_q)) :
899: // The skid buffer becomes valid when:
900: // - we branch to an unaligned uncompressed instruction
901: (((output_addr_q[1] & (~output_compressed | output_err)) |
902: // - a compressed instruction misaligns the stream
903: (~output_addr_q[1] & output_compressed & ~output_err & ready_i)) & data_valid));
904:
905: always_ff @(posedge clk_i or negedge rst_ni) begin
906: if (!rst_ni) begin
907: skid_valid_q <= 1'b0;
908: end else begin
909: skid_valid_q <= skid_valid_d;
910: end
911: end
912:
913: // Signal that valid data is available to the IF stage
914: // Note that if the first half of an unaligned instruction reports an error, we do not need
915: // to wait for the second half (and for PMP errors we might not have fetched the second half)
916: // Compressed instruction completely satisfied by skid buffer
917: assign output_valid = skid_complete_instr |
918: // Output data available and, output stream aligned, or skid data available,
919: (data_valid & (~output_addr_q[1] | skid_valid_q |
920: // or this is an error or an unaligned compressed instruction
921: output_err | (output_data[17:16] != 2'b11)));
922:
923: // Update the address on branches and every time an instruction is driven
924: assign output_addr_en = branch_i | (ready_i & valid_o);
925:
926: // Increment the address by two every time a compressed instruction is popped
927: assign addr_incr_two = output_compressed & ~err_o;
928:
929: assign output_addr_d = branch_i ? addr_i[31:1] :
930: (output_addr_q[31:1] +
931: // Increment address by 4 or 2
932: {29'd0, ~addr_incr_two, addr_incr_two});
933:
934: always_ff @(posedge clk_i) begin
935: if (output_addr_en) begin
936: output_addr_q <= output_addr_d;
937: end
938: end
939:
940: // Mux the data from BusWidth to halfword
941: // This muxing realigns data when instruction words are split across BUS_W e.g.
942: // word 1 |----|*h1*|
943: // word 0 |*h0*|----| --> |*h1*|*h0*|
944: // 31 15 0 31 15 0
945: always_comb begin
946: output_data_lo = '0;
947: for (int i = 0; i < OUTPUT_BEATS; i++) begin
948: if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
949: output_data_lo |= output_data[i*16+:16];
950: end
951: end
952: end
953:
954: always_comb begin
955: output_data_hi = '0;
956: for (int i = 0; i < OUTPUT_BEATS-1; i++) begin
957: if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
958: output_data_hi |= output_data[(i+1)*16+:16];
959: end
960: end
961: if (&output_addr_q[BUS_W-1:1]) begin
962: output_data_hi |= output_data[15:0];
963: end
964: end
965:
966: assign valid_o = output_valid;
967: assign rdata_o = {output_data_hi, (skid_valid_q ? skid_data_q : output_data_lo)};
968: assign addr_o = {output_addr_q, 1'b0};
969: assign err_o = (skid_valid_q & skid_err_q) | (~skid_complete_instr & output_err);
970: // Error caused by the second half of a misaligned uncompressed instruction
971: // (only relevant when err_o is set)
972: assign err_plus2_o = skid_valid_q & ~skid_err_q;
973:
974: ///////////////////
975: // Invalidations //
976: ///////////////////
977:
978: // Invalidate on reset, or when instructed. If an invalidation request is received while a
979: // previous invalidation is ongoing, it does not need to be restarted.
980: assign start_inval = (~reset_inval_q | icache_inval_i) & ~inval_prog_q;
981: assign inval_prog_d = start_inval | (inval_prog_q & ~inval_done);
982: assign inval_done = &inval_index_q;
983: assign inval_index_d = start_inval ? '0 :
984: (inval_index_q + {{INDEX_W-1{1'b0}},1'b1});
985:
986: always_ff @(posedge clk_i or negedge rst_ni) begin
987: if (!rst_ni) begin
988: inval_prog_q <= 1'b0;
989: reset_inval_q <= 1'b0;
990: end else begin
991: inval_prog_q <= inval_prog_d;
992: reset_inval_q <= 1'b1;
993: end
994: end
995:
996: always_ff @(posedge clk_i) begin
997: if (inval_prog_d) begin
998: inval_index_q <= inval_index_d;
999: end
1000: end
1001:
1002: /////////////////
1003: // Busy status //
1004: /////////////////
1005:
1006: // Only busy (for WFI purposes) while an invalidation is in-progress, or external requests are
1007: // outstanding.
1008: assign busy_o = inval_prog_q | (|(fill_busy_q & ~fill_rvd_done));
1009:
1010: ////////////////
1011: // Assertions //
1012: ////////////////
1013:
1014: `ASSERT_INIT(size_param_legal, (LineSize > 32))
1015:
1016: // ECC primitives will need to be changed for different sizes
1017: `ASSERT_INIT(ecc_tag_param_legal, (TAG_SIZE <= 27))
1018: `ASSERT_INIT(ecc_data_param_legal, (LineSize <= 121))
1019:
1020: endmodule
1021: