Reputation: 7
I use the ieee_proposed
library and try to do a newton-raphson refinement. The code compiles and simulates, but I get no signals. It works with float32
in the library ieee.float_pkg.all
. But, I want float64
for better accuracy and then truncate it to float,32. It doesn't work to open Data flow design, but it works to simulate the signal. Here is the code and the testbench:
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library ieee_proposed;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee_proposed.math_utility_pkg.all;
use ieee_proposed.fixed_pkg.all;
use ieee_proposed.float_pkg.all;
entity rsqrt_refine is
port (
approx_in : in std_logic_vector(31 downto 0); -- Approximation input (IEEE 754, 32 bits)
input_in : in std_logic_vector(31 downto 0); -- Original input value (IEEE 754, 32 bits)
result_out : out std_logic_vector(31 downto 0) -- Refined result (IEEE 754, 32 bits)
);
end rsqrt_refine;
architecture Behavioral of rsqrt_refine is
-- Constants for floating-point arithmetic in float64
constant ONE_POINT_FIVE : float64 := to_float64("0011111111111000000000000000000000000000000000000000000000000000"); -- 1.5 as float64
constant POINT_FIVE : float64 := to_float64("0011111111100000000000000000000000000000000000000000000000000000"); -- 0.5 as float64
-- Function to convert float64 to std_logic_vector as float32
function float64_to_float32_slv(f: float64) return std_logic_vector is
variable f32: float32;
begin
f32 := to_float32(f); -- Convert float64 to float32
return std_logic_vector(f32); -- Convert float32 to std_logic_vector
end function;
begin
process(approx_in, input_in)
variable input_val : float64;
variable refined : float64;
variable temp : float64;
variable approx : float64;
begin
-- Step 1: Convert inputs to float64
approx := to_float64(UNRESOLVED_float(approx_in)); -- Convert std_logic_vector to float64
input_val := to_float64(UNRESOLVED_float(input_in)); -- Convert std_logic_vector to float64
-- Step 2: Perform one Newton-Raphson refinement using float64
temp := subtract(ONE_POINT_FIVE, multiply(POINT_FIVE, multiply(input_val, multiply(approx, approx))));
refined := multiply(approx, temp);
-- Step 3: Convert refined result back to std_logic_vector as float32
result_out <= float64_to_float32_slv(refined); -- Convert float64 to std_logic_vector (float32)
end process;
end Behavioral;
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity rsqrt_refine_tb is
end rsqrt_refine_tb;
architecture Behavioral of rsqrt_refine_tb is
-- Testbench signals
signal approx_in : std_logic_vector(31 downto 0);
signal input_in : std_logic_vector(31 downto 0);
signal result_out : std_logic_vector(31 downto 0);
begin
-- Instantiate the rsqrt_refine entity
uut: entity work.rsqrt_refine
port map (
approx_in => approx_in,
input_in => input_in,
result_out => result_out
);
process
begin
-- Set test inputs
-- approx_in = 0x5E800000 (1.875 in IEEE 754)
approx_in <= "01011110001101010000010011110011";
-- input_in = 0x01800000 (~1.19209e-7 in IEEE 754)
input_in <= "00000010000000000000000000000001";
-- Wait for computation
wait for 10 ns;
-- Observe result_out for the refined value
wait;
end process;
end Behavioral;
I had to several changes to have a valid input to to_float32(x)
. I had to change it to to_float32
(UNRESOLVED_float(x)) to make the code compile.
Upvotes: -1
Views: 117
Reputation: 7
It worked using IEEE libray float_pkg. However, it uses alot of resources 21185 LUTs and 54 dsps. Here is the working code. It only compiles with VHDL2008.
library ieee;
use ieee.std_logic_1164.all;
use ieee.float_pkg.all;
-- Entity declaration
entity rsqrt_refine is
port (
approx : in std_logic_vector(31 downto 0); -- Initial approximation input
input : in std_logic_vector(31 downto 0); -- Input value
refined : out std_logic_vector(31 downto 0) -- Refined reciprocal square root output
);
end entity rsqrt_refine;
-- Architecture definition
architecture Combinational of rsqrt_refine is
constant factor : float64 := to_float(1.5, 11, 52); -- Constant 1.5 in double precision
constant half : float64 := to_float(0.5, 11, 52); -- Constant 0.5 in double precision
begin
-- Combinational logic for refinement
process(approx, input)
variable approx_unresolved : UNRESOLVED_float(8 downto -23); -- Intermediate type for approx
variable input_unresolved : UNRESOLVED_float(8 downto -23); -- Intermediate type for input
variable approx_float : float32;
variable input_float : float32;
variable approx_double : float64;
variable input_double : float64;
variable temp_refined : float64;
variable result_double : float64;
variable result_float : float32;
begin
-- Convert std_logic_vector to UNRESOLVED_float
approx_unresolved := to_float(std_ulogic_vector(approx), 8, 23); -- Corrected conversion
input_unresolved := to_float(std_ulogic_vector(input), 8, 23); -- Corrected conversion
-- Convert UNRESOLVED_float to float32
approx_float := to_float32(approx_unresolved);
input_float := to_float32(input_unresolved);
-- Convert float32 inputs to float64
approx_double := to_float64(approx_float);
input_double := to_float64(input_float);
-- First refinement in double precision
temp_refined := approx_double * (factor - half * input_double * (approx_double * approx_double));
-- Second refinement in double precision
result_double := temp_refined * (factor - half * input_double * (temp_refined * temp_refined));
-- Convert result back to float32
result_float := to_float32(result_double);
-- Convert result from float32 to std_logic_vector
refined <= to_slv(result_float);
end process;
end architecture Combinational;
Upvotes: 0