在有限域Fp上的非零元素a的逆記為a-1mod p 。即在有限域Fp上存在唯一的一個元素x,使得ax恆等於1(mod p),則元素x為a的逆a-1 。本次設計採用擴充套件的整數Euclidean演算法來求逆元。
擴充套件的整數Euclidean演算法可參考該網站:https://www.cnblogs.com/GjqDream/p/11537934.html
本博文主要介紹verilog實現該演算法。
根據模組化的設計思想,設計該模組介面定義如下:
訊號名 |
方向 |
位寬 |
埠定義 |
clk |
Input |
1 |
時鐘 |
reset |
Input |
1 |
復位 |
Inv_en |
Input |
1 |
模逆使能訊號 |
Inv_in |
Input |
512 |
待求逆訊號 |
Inv_out |
output |
256 |
模逆結果 |
Inv_done |
output |
1 |
模逆完成標識 |
二進位制擴充套件Euclidean演算法
輸入:模逆使能訊號inv_en,整數0<a<p
輸出:a-1mod p
- u=a,v=p,A=1,C=0;
- 若 ,重複執行步驟2,否則直接返回C=0
2.1. 若u為偶數,重複執行2.1節
2.1.1. u=u/2。
2.1.2. 若A為偶數,則A=A/2,否則A=(A+P)/2。
2.2. 若v為偶數,重複執行2.2節
2.2.1. v=v/2。
2.2.2. 若C為偶數,則C=C/2;否則C=(C+P)/2。
2.3. 若 ,則u=u-v,A=A-C;否則v=v-u,C=C-A。
3.返回(C mod p)。
為驗證模逆演算法正確性,我們選取一個簡單的橢圓曲線進行驗證,選取的曲線為見以往演算法模組,其中a = 4; p = 29
選用輸入inv_in = 15,模擬結果為2,15*2=30 mod 29 = 1(mod29),結果正確。
程式碼如下:
module mod_inv (
input clk,
input reset,
input mod_inv_en,
input mod_inv_end,
input [511:0] in,
input [255:0] params_p,
output [255:0] out,
output mod_inv_done
); /*Since Z = 2 for the case of binary polynomials, all divisions can be preformed via a right shift, and all
**divisibility checks can be preformed by checking the least signifigant bit.
**Since the only elliptic curve operations we have to worry about are point doubling and point adding, we're
**not concerned with numbers greater than 2P, which will be limited to 257 bits
**
**UPDATE 11/22: Ditched that assumption, now allows inputs up to 512 bits instead of 257
*/ //parameter params_p = 256'hFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F; //Control Signals
reg u_load, v_load, g1_load, g2_load, count_load;
reg [511:0] u_in, v_in, g1_in, g2_in;
reg [10:0] count_in;
reg mod_inv_done_r;
reg [255:0] out_r;
wire [511:0] u_out,v_out,g1_out,g2_out;
wire [10:0] count_out; //state machine states
reg [2:0] state, next_state;
parameter Init = 3'd0;
parameter Start = 3'd1;
parameter Check_u = 3'd2;
parameter Check_v = 3'd3;
parameter Check_deg = 3'd4;
parameter Wait = 3'd5;
parameter Finish = 3'd6; //Register Instatntations
reg_256 #(512) u(.clk(clk), .load(u_load), .data(u_in), .out(u_out));
reg_256 #(512) v(.clk(clk), .load(v_load), .data(v_in), .out(v_out));
reg_256 #(512) g1(.clk(clk), .load(g1_load), .data(g1_in), .out(g1_out));
reg_256 #(512) g2(.clk(clk), .load(g2_load), .data(g2_in), .out(g2_out)); reg_256 #(11) counter(.clk(clk), .load(count_load), .data(count_in), .out(count_out)); //state machine behavior
always@(posedge clk) begin
if(reset)
state <= Init;
else
state <= next_state;
end //Next state Logic
always@(*) begin
next_state = state;
case(state)
Init: if(mod_inv_en && in != 0 )
next_state = Start;
else if(mod_inv_en && in == 0 )
next_state = Finish;
else
next_state = Init;
Start: begin
if(u_out == 512'b01 || v_out == 512'b01)
next_state = Wait;
else if(u_out[0] == 0)
next_state = Check_u;
else if(v_out[0] == 0)
next_state = Check_v;
else
next_state = Check_deg;
end
Check_u: begin
if(u_out[0] == 0)
next_state = Check_u;
else if(v_out[0] == 0)
next_state = Check_v;
else
next_state = Check_deg;
end
Check_v: begin
if(v_out[0] == 0)
next_state = Check_v;
else
next_state = Check_deg;
end
Check_deg:
next_state = Start;
Wait:
if(count_out == 11'd470) next_state = Finish;
Finish:
next_state = mod_inv_end ? Init : Finish;
default:
next_state = Init;
endcase
end always@(*) begin
//Default values
u_in = u_out;
v_in = v_out;
g1_in = g1_out;
g2_in = g2_out;
u_load = 1'b0;
v_load = 1'b0;
g1_load = 1'b0;
g2_load = 1'b0;
out_r = 256'b0;
mod_inv_done_r = 1'b0;
count_load = 1'b1;
count_in = count_out + 1;
//Preform algorithm steps
case(state)
Init: begin
u_in = in;
v_in = params_p;
mod_inv_done_r = 1'b0;
g1_in = 512'b01;
g2_in = 512'b0;
u_load = 1'b1;
v_load = 1'b1;
g1_load = 1'b1;
g2_load = 1'b1;
count_in = 0;
end
Start:begin end
Check_u: begin
u_in = u_out>>1; //Divide by z (z=2)
if(g1_out[0] == 0)
g1_in = g1_out>>1;
else
g1_in = (g1_out + params_p)>>1;
if(u_out != 512'b01 && u_out[0] == 0) begin
u_load = 1'b1;
g1_load = 1'b1;
end
end
Check_v: begin
v_in = v_out>>1;
if(g2_out[0] == 0)
g2_in = g2_out>>1;
else
g2_in = (g2_out + params_p)>>1;
if(v_out != 512'b01 && v_out[0] == 0) begin
v_load = 1'b1;
g2_load = 1'b1;
end
end
Check_deg: begin //Checks if deg(u) > deg(v)
if(u_out > v_out && u_out >= ((v_out<<1) - v_out)) begin
u_in = u_out + v_out;
g1_in = g1_out + g2_out;
u_load = 1'b1;
g1_load = 1'b1;
end
else begin
v_in = v_out + u_out;
g2_in = g2_out + g1_out;
v_load = 1'b1;
g2_load = 1'b1;
end
end
Wait:
if(count_out != 11'd470)
count_in = count_out + 1;
Finish: begin
mod_inv_done_r = 1'b1;
if(in == 0)
out_r = 0;
else if(u_out == 512'b01 && in != 0)
out_r = g1_out[255:0];
else if(u_out != 512'b01 && in != 0)
out_r = g2_out[255:0];
else
out_r = g2_out[255:0];
end
default: begin end
endcase
end
assign out = (state==Finish)? out_r : 0;
assign mod_inv_done = (state==Finish)? mod_inv_done_r : 0;
endmodule