いまさらSpartan-3A Starter Kitを入手しました。
http://kozos.jp/fpga/spartan3a.html
まずはTD4を動かすのにPCでシミュレーションをやってみます。
https://qiita.com/asfdrwe/items/8dd955240b8291346384
実機ではTang Nanoでの実行がありましたが、せっかくなのでSpartan-3Aで実行してみました。
td4.v
top_td4.v
prescaler.v
こちらを参考にUCFを記載しました。
https://propella.hatenablog.com/entry/20080616/p1
NET "CLOCK" LOC = "E12"| IOSTANDARD = LVCMOS33 ;
NET "IN<0>" LOC = "V8" | IOSTANDARD = LVTTL | PULLUP ;
NET "IN<1>" LOC = "U10"| IOSTANDARD = LVTTL | PULLUP ;
NET "IN<2>" LOC = "U8" | IOSTANDARD = LVTTL | PULLUP ;
NET "IN<3>" LOC = "T9" | IOSTANDARD = L
VTTL | PULLUP ;
NET "OUT<3>" LOC = "U19" | IOSTANDARD = LVTTL | SLEW = QUIETIO | DRIVE = 4 ;
NET "OUT<2>" LOC = "U20" | IOSTANDARD = LVTTL | SLEW = QUIETIO | DRIVE = 4 ;
NET "OUT<1>" LOC = "T19" | IOSTANDARD = LVTTL | SLEW = QUIETIO | DRIVE = 4 ;
NET "OUT<0>" LOC = "R20" | IOSTANDARD = LVTTL | SLEW = QUIETIO | DRIVE = 4 ;
NET "RESET" LOC = "T15" | IOSTANDARD = LVTTL | PULLDOWN ; # SOUTH BUTTON
書籍「作ろう!CPU」を参考にtop_td4.vとprescaler.vを追加します。
td4.v
module td4 (
  input wire CLOCK,
  input wire RESET,
  input wire [3:0] IN,
  output reg [3:0] reg_out);
  reg [3:0] reg_a, reg_b;
  reg [3:0] pc = 4'b0;
  reg cflag = 1'b1;
  reg [7:0] rom[0:15];
//  initial $readmemb("ROM.bin", rom);
  initial begin
      rom[0] =  8'b10110111; // OUT 0111   # LED
      rom[1] =  8'b00000001; // ADD A,0001
      rom[2] =  8'b11100001; // JNC 0001   # loop 16 times
      rom[3] =  8'b00000001; // ADD A,0001
      rom[4] =  8'b11100011; // JNC 0011   # loop 16 times
      rom[5] =  8'b10110110; // OUT 0110   # LED
      rom[6] =  8'b00000001; // ADD A,0001
      rom[7] =  8'b11100110; // JNC 0110   # loop 16 times
      rom[8] =  8'b00000001; // ADD A,0001 
      rom[9] =  8'b11101000; // JNC 1000   # loop 16 times
      rom[10] = 8'b10110000; // OUT 0000   # LED
      rom[11] = 8'b10110100; // OUT 0100   # LED
      rom[12] = 8'b00000001; // ADD A,0001
      rom[13] = 8'b11101010; // JNC 1010   # loop 16 times
      rom[14] = 8'b10111000; // OUT 1000   # LED
      rom[15] = 8'b11111111; // JMP 1111
  end
	
  wire [7:0] opcode;
  assign opcode = rom[pc];
  assign op = opcode;
  wire [1:0] alu_sel, load_sel;
  wire jmp;
  wire [3:0] im; // IMMEDIATE 
  assign alu_sel = (opcode[7:6] == 2'b11) ? 2'b11 : opcode[5:4];
  assign load_sel = opcode[7:6];
  assign jmp = opcode[4];
  assign im = opcode[3:0];
  wire [3:0] alu_in;
  assign alu_in = (alu_sel == 2'b00) ? reg_a : // from A
                  (alu_sel == 2'b01) ? reg_b : // from B
                  (alu_sel == 2'b10) ? IN : // from input port
                                       4'b0000; // zero
  
  wire [3:0] alu_out;
  wire nextcflag;
  assign {nextcflag, alu_out} = alu_in + im;
  assign alu_data = alu_out;
  wire load_a, load_b, load_out, load_pc;
  assign load_a = (load_sel == 2'b00) ? 1'b0 : 1'b1; // negative logic
  assign load_b = (load_sel == 2'b01) ? 1'b0 : 1'b1; // negative logic
  assign load_out = (load_sel == 2'b10) ? 1'b0 : 1'b1; // negative logic
  assign load_pc = (load_sel == 2'b11 && (jmp == 1'b1 || cflag)) ? 1'b0 : 1'b1; // negative logic
  wire [3:0] next_pc;
  assign next_pc = (load_pc == 1'b0) ? alu_out : pc + 1;
  always @(posedge CLOCK or negedge RESET) begin
    if (!RESET) begin
      reg_a <= 4'b0;
      reg_b <= 4'b0;
      reg_out <= 4'b0;
      cflag <= 1'b1;
      pc <= 4'b0;
    end else begin
      reg_a <= #1 (load_a == 1'b0) ? alu_out : reg_a;
      reg_b <= #1 (load_b == 1'b0) ? alu_out : reg_b;
      reg_out <= #1 (load_out == 1'b0) ? alu_out : reg_out;
      cflag  <= #1 ~nextcflag; // negative logic carry 
      pc <= #1 next_pc;
    end
  end
endmodule
top_td4.v
`timescale 1ns / 1ps
module top_td4(
            input CLOCK,
            input RESET,
            input [3:0] IN,
            output [3:0] OUT
            );
   wire td4_CLOCK;
   prescaler #(.RATIO(10_000_000)) prescaler(
    .quick_clock(CLOCK),
    .slow_clock(td4_CLOCK)
   );
   td4 td4_0(td4_CLOCK, ~RESET, IN, OUT);   
endmodule
prescaler.v
module prescaler #(parameter RATIO = 2) (
  input  wire quick_clock,
  output reg slow_clock
);	
  reg [31:0] counter;
  wire [31:0] next_counter;
  wire inv;
  assign inv = (counter == (RATIO/2 - 1));
  assign next_counter = inv ? 32'd0 : counter + 32'd1;
  always @(posedge quick_clock) counter <= next_counter;
  wire next_slow_clock;
  assign next_slow_clock = inv ? ~slow_clock : slow_clock;
  always @(posedge quick_clock) slow_clock <= next_slow_clock;
endmodule
PCでシミュレーションもやってみます。あえてtop_td4.vではなくtd4.vを対象とします。
test_td4.v
`timescale 1ns/1ps
module test_td4;
  reg clock = 1'b0;
  reg reset_n = 1'b0;
  reg [3:0] reg_in = 4'b0000;
  wire [3:0] pc_out;
  td4 td4_1(clock, reset_n, reg_in, pc_out);
  initial begin
    $dumpfile("TD4.vcd");
    $dumpvars(0, test_td4);
    $monitor("%t: in_port = %h, out_port = %h", $time, reg_in, pc_out);
  end
  initial begin
    clock = 1'b0;
    forever begin
      #1 clock = ~clock;
    end
  end
  initial begin
    reset_n = 1'b0;
    #1 reset_n = 1'b1;
    #10000 $finish;
  end
endmodule