Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ sources:
- src/fpnew_divsqrt_th_32.sv
- src/fpnew_divsqrt_th_64_multi.sv
- src/fpnew_divsqrt_multi.sv
- vendor/cvw/fma/fmalza.sv
- src/fpnew_fma.sv
- src/fpnew_fma_multi.sv
- src/fpnew_noncomp.sv
Expand Down
55 changes: 42 additions & 13 deletions src/fpnew_fma_multi.sv
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,23 @@ module fpnew_fma_multi #(
? 1'b1
: (effective_subtraction ? 1'b0 : tentative_sign);

// ------
// Leading Zero Anticipator
// ------
logic [LZC_RESULT_WIDTH-1:0] lza_count;

fmalza #(
.WIDTH ( LOWER_SUM_WIDTH ),
.NF ( PRECISION_BITS-1 )
) i_fmalza (
.A ( addend_shifted[LOWER_SUM_WIDTH-1:0] ),
.Pm ( product ),
.Cin ( inject_carry_in ),
.sub ( effective_subtraction ),
.SCnt ( lza_count )
);


// ---------------
// Internal pipeline
// ---------------
Expand All @@ -531,6 +548,7 @@ module fpnew_fma_multi #(
logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q;
logic sticky_before_add_q;
logic [3*PRECISION_BITS+3:0] sum_q;
logic [LZC_RESULT_WIDTH-1:0] lza_count_q;
logic final_sign_q;
fpnew_pkg::fp_format_e dst_fmt_q2;
fpnew_pkg::roundmode_e rnd_mode_q;
Expand All @@ -545,6 +563,7 @@ module fpnew_fma_multi #(
logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q;
logic [0:NUM_MID_REGS] mid_pipe_sticky_q;
logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q;
logic [0:NUM_MID_REGS][LZC_RESULT_WIDTH-1:0] mid_pipe_lza_count_q;
logic [0:NUM_MID_REGS] mid_pipe_final_sign_q;
fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q;
fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q;
Expand All @@ -566,6 +585,7 @@ module fpnew_fma_multi #(
assign mid_pipe_add_shamt_q[0] = addend_shamt + addend_normalize_shamt;
assign mid_pipe_sticky_q[0] = sticky_before_add;
assign mid_pipe_sum_q[0] = sum;
assign mid_pipe_lza_count_q[0] = lza_count;
assign mid_pipe_final_sign_q[0] = final_sign;
assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS];
assign mid_pipe_dst_fmt_q[0] = dst_fmt_q;
Expand Down Expand Up @@ -599,6 +619,7 @@ module fpnew_fma_multi #(
`FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0)
`FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0)
`FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0)
`FFL(mid_pipe_lza_count_q[i+1], mid_pipe_lza_count_q[i], reg_ena, '0)
`FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0)
`FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE)
`FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0))
Expand All @@ -617,6 +638,7 @@ module fpnew_fma_multi #(
assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS];
assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS];
assign sum_q = mid_pipe_sum_q[NUM_MID_REGS];
assign lza_count_q = mid_pipe_lza_count_q[NUM_MID_REGS];
assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS];
assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS];
assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS];
Expand All @@ -630,7 +652,9 @@ module fpnew_fma_multi #(
logic [LOWER_SUM_WIDTH-1:0] sum_lower; // lower 2p+3 bits of sum are searched
logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes
logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count
logic lzc_zeroes; // in case only zeroes found
logic sum_lower_zero; // in case only zeroes found
logic [LOWER_SUM_WIDTH:0] least_leading_0_onehot; // onehot encoded least leading 0
logic lza_overpredict; // LZA over-predicted actual LZC by 1

logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount
logic signed [EXP_WIDTH-1:0] normalized_exponent;
Expand All @@ -644,32 +668,37 @@ module fpnew_fma_multi #(

assign sum_lower = sum_q[LOWER_SUM_WIDTH-1:0];

// Leading zero counter for cancellations
lzc #(
.WIDTH ( LOWER_SUM_WIDTH ),
.MODE ( 1 ) // MODE = 1 counts leading zeroes
) i_lzc (
.in_i ( sum_lower ),
.cnt_o ( leading_zero_count ),
.empty_o ( lzc_zeroes )
);
assign sum_lower_zero = sum_lower == '0;

// A carry might have propagated into the least leading 0 bit (the lowest 0 bits just before the
// first 1 bit) predicted by the LZA.
// Note: This is a mux that only looks at `sum_q[LOWER_SUM_WIDTH:0]`.
assign least_leading_0_onehot = {1'b1, {LOWER_SUM_WIDTH{1'b0}}} >> lza_count_q;
assign lza_overpredict = |(sum_q[LOWER_SUM_WIDTH:0] & least_leading_0_onehot);

// Get actual LZC by correcting LZA in case of over-prediction
assign leading_zero_count = lza_overpredict ? lza_count_q - 1 : lza_count_q;
assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count});

// Normalization shift amount based on exponents and LZC (unsigned as only left shifts)
always_comb begin : norm_shift_amount
// Product-anchored case or cancellations require LZC
if ((exponent_difference_q <= 0) || (effective_subtraction_q && (exponent_difference_q <= 2))) begin
// Normal result (biased exponent > 0 and not a zero)
if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !lzc_zeroes) begin
if ((exponent_product_q - signed'({1'b0, lza_count_q}) + 1 >= 0) && !sum_lower_zero) begin
// Undo initial product shift, remove the counted zeroes
norm_shamt = PRECISION_BITS + 2 + leading_zero_count;
normalized_exponent = exponent_product_q - leading_zero_count_sgn + 1; // account for shift
norm_shamt = PRECISION_BITS + 1 + lza_count_q;
normalized_exponent = exponent_product_q - signed'({1'b0, lza_count_q}) + 2; // account for shift
// Subnormal result
end else begin
// Cap the shift distance to align mantissa with minimum exponent
norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q));
normalized_exponent = 0; // subnormals encoded as 0
// Fix exponent in case of a normal number accidentally being classified as subnormal due
// to LZA over-prediction (LZA could be 1 larger than actual LZC)
if ((exponent_product_q - leading_zero_count_sgn + 1 >= 0) && !sum_lower_zero) begin
normalized_exponent = 1;
end
end
// Addend-anchored case
end else begin
Expand Down
68 changes: 68 additions & 0 deletions vendor/cvw/fma/fmalza.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
///////////////////////////////////////////
// fmalza.sv
//
// Written: 6/23/2021 me@KatherineParry.com, David_Harris@hmc.edu
// Modified:
//
// Purpose: Leading Zero Anticipator
//
// Documentation: RISC-V System on Chip Design Chapter 13 (Figure 13.14)
// See also [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////

module fmalza #(WIDTH, NF) (
input logic [WIDTH-1:0] A, // addend
input logic [2*NF+1:0] Pm, // product
input logic Cin, // carry in
input logic sub, // subtraction
output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result
);

logic [WIDTH:0] F; // most significant bit of F indicates leading digit
logic [WIDTH-1:0] B; // zero-extended product with same size as aligned A
logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column
logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1

assign B = {{(NF+2){1'b0}}, Pm, 2'b0}; // Zero extend product

assign P = A^B;
assign G = A&B;
assign K = ~A&~B;

assign Pp1 = {sub, P[WIDTH-1:1]}; // shift P right by 1 (for P_i+1) , use subtract flag in most significant bit
assign Gm1 = {G[WIDTH-2:0], Cin}; // shift G left by 1 (for G_i-1) and bring in Cin
assign Km1 = {K[WIDTH-2:0], ~Cin}; // shift K left by 1 (for K_i-1) and bring in Cin

// Apply function to determine Leading pattern
// - note: Schmookler01 uses the numbering system where 0 is the most significant bit
assign F[WIDTH] = ~sub&P[WIDTH-1];
assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1));

lzc #(
.WIDTH ( WIDTH+1 ),
.MODE ( 1 ) // MODE = 1 counts leading zeroes
) i_lzc (
.in_i ( F ),
.cnt_o ( SCnt ),
.empty_o ( /*unused*/ )
);

endmodule