Reputation: 31
I designed a single layer perceptron for a lab I need to finish. It is working perfectly as expected, and I am receiving the expected output compared to a testbench given to us. The only issue is that the output is exactly one clock too late compared to the testbench, and this is causing error log messages to appear. I have tried a lot to find where I went wrong, so any help is appreciated. Verilog and testbench code will be given below.
`timescale 1ns / 1ps
module perceptron(
input rst_n,
input clk,
input signed [7:0] x1,
input signed [7:0] x2,
input valid_in,
output reg y,
output reg y_valid
);
reg signed [7:0] w1 = 8'sb00000010; //weight 1
reg signed [7:0] w2 = 8'sb11111110; //weight 2
reg signed [7:0] b = 8'sb11111101; //bias
//stage 1: p1 = w1*x1 and p2 = w2*x2.
//stage 2: s = p1 + p2 + b
reg valid_pipe;
reg signed [15:0] p1;
reg signed [15:0] p2;
reg signed [15:0] s;
always @(posedge clk) begin
if (!rst_n) begin
y <= 1'b0;
// y_valid <= 1'b0;
// w1 <= 8'b0;
// w2 <= 8'b0;
// b <= 8'b0;
valid_pipe <= 1'b0;
// p1 <= 16'b0;
// p2 <= 16'b0;
// s <= 16'b0;
end
else begin
//->first pipe cycle
valid_pipe <= valid_in;
p1 <= w1 * x1;
p2 <= w2 * x2;
//valid_pipe <= valid_in;
//-->second pipe cycle
//y_valid <= valid_pipe;
s <= p1 + p2 + b;
y <= (s >= 0) ? 1'b1 : 1'b0;
y_valid <= valid_pipe;
end
end
endmodule
Testbench:
`timescale 1ns / 1ps
module tb_perceptron(
);
reg clk;
reg rst_n;
wire signed [7:0] x1;
wire signed [7:0] x2;
wire y;
wire tb_y;
reg valid_in;
wire y_valid;
reg [3:0] addr_in, addr_y;
perceptron dut (.*);
rom #( .addr_width (4), .data_width (8), .init_file("x1.dat") )
x1_mem(
.addr(addr_in),
.data (x1)
);
rom #( .addr_width (4), .data_width (8), .init_file("x2.dat") )
x2_mem(
.addr(addr_in),
.data (x2)
);
rom #( .addr_width (4), .data_width (1), .init_file("y.dat") )
y_mem(
.addr(addr_y),
.data (tb_y)
);
always #5 clk = ~clk;
//integer file_handle;
initial
begin
//file_handle = $fopen("output.txt", "w");
//i edit here for delay for check.
//#5;
clk = 0;
rst_n = 1'h0;
valid_in = 0;
#73 rst_n = 1'h1;
#17;
addr_in = 4'h0;
#20;
valid_in = 1;
for (integer i = 0; i < 16; i = i + 1)
begin
#10;
//assert (y == tb_y);
addr_in = addr_in + 1;
end
valid_in = 0;
#50;
//$fclose(file_handle);
end
always_ff @ (posedge clk)
begin
if (!rst_n)
begin
addr_y <= #0.1 4'h0;
end
else if (y_valid)
begin
//$fwrite(file_handle, "%b\n", y);
assert (y == tb_y)
else $error("y not equal to tb_y");
addr_y <= #0.1 addr_y + 1;
end
end
endmodule
ROM file:
`timescale 1ns/1ps
module rom #(addr_width = 4, data_width = 4, string init_file = "dummy.dat" )
(
input [addr_width-1:0]addr,
output [data_width-1:0]data
);
reg [data_width-1:0] mem [ (1<<addr_width)-1:0];
initial
begin
$readmemb (init_file, mem);
/*for (integer i = 0; i < 16; i = i + 1)
begin
mem[i] = 4'hf - i;
end*/
end
assign data = mem[addr];
endmodule
x1 dat:
00000101
11111100
00000000
00001100
11111100
00000100
00000100
00000100
11111100
11111100
00100000
00000100
11111100
11111100
00000000
00000100
x2 dat:
11111010
00000100
00000000
11111100
00000100
11111100
11111100
00000100
11111100
00000100
11111100
11111100
00000000
11110000
00000100
11111100
y dat:
1
0
0
1
0
1
1
0
0
0
1
1
0
1
0
1
I have tried moving around valid_in and asserting y_valid at different stages. Adding manual delays didn't help that much. I might have an error in the pipelining / sequential logic, so that might be the cause also. I am using AMD vivado.
Upvotes: 0
Views: 125
Reputation: 647
Your pipeline stages are not same for y
and y_valid
in the else block of your module.
else begin
valid_pipe <= valid_in;
p1 <= w1 * x1;
p2 <= w2 * x2;
s <= p1 + p2 + b;
y <= (s >= 0) ? 1'b1 : 1'b0;
y_valid <= valid_pipe;
end
In the above simplified snippet, you can see that y
takes 3 cycle (x1-->p1-->s-->y) to get to output and y_valid
only take 2 cycles (valid_in-->valid_pipe -->y_valid).
And in the testbench you are using y_valid
to capture the y
with y_tb
.
Solution is to either remove one pipeline form y
path OR second solution is to add one pipeline stage in y_valid
path.
else begin
valid_pipe <= valid_in;
p1 <= w1 * x1;
p2 <= w2 * x2;
s <= p1 + p2 + b;
y <= (s >= 0) ? 1'b1 : 1'b0;
y_valid_reg <= valid_pipe; //Additional Pipeline stage on valid
y_valid <= y_valid_reg ;
end
add reg y_valid_reg;
at the top as well.
Upvotes: 1