Форум разработчиков электроники ELECTRONIX.ru

Помощь - Поиск - Пользователи - Календарь

Полная версия этой страницы: ДКП

Форум разработчиков электроники ELECTRONIX.ru > Программируемая логика ПЛИС (FPGA,CPLD, PLD) > Работаем с ПЛИС, области применения, выбор

Kocapb

Dec 14 2007, 09:35

здраствуйте, я новичок-студент 2ой курс, знакомлюсь с программированием ПЛИС, делаю работу по ДКП.
Читал тут тему про дкп http://electronix.ru/forum/index.php?showtopic=4132 но с сожелению там не 1 ссылка не работает.
Хотелось бы найти ссылки на IP блоки к 8 точным ДКП, или если кто поможет ражобраться с лог. схемой подключения 8 MACов.
Было бы интересно посмотреть код для 8 точного ДКП в Verilog. В общем буду благодарен любой помощи, можно линки на лит-ру!!!

ClockworkOrange

Dec 14 2007, 10:34

>> Читал тут тему про дкп http://electronix.ru/forum/index.php?showtopic=4132 но с сожелению там не 1 ссылка не работает.

Если вас интересует практическая реализация описанных там алгоритмов в FPGA и ваше применение совместимо с лицензией GNU/GPL, то вы можете загрузить код на Verilog-е (для Xilinx Spartan 3) здесь

Kocapb

Dec 14 2007, 10:57

извеняюсь но не могу найти там ДКП, плохо ориентируюсь на не русских сайтах

ClockworkOrange

Dec 14 2007, 11:07

нда..

я так полагаю следующим вопросом будет что делать с файлом, имеющим расширение .tar.gz ??

Kocapb

Dec 14 2007, 11:20

да я чайник в этом деле. Кроме мах plus II не с чем не работал, на Verilog писал самые простейшие компонеты (Сумматор, АЛУ, Регистр и т.п.) много не знаю, но хочу узать, доступ к лит-ре ограничен.

ClockworkOrange

Dec 14 2007, 11:39

CODE

/**********************************************************************
** -----------------------------------------------------------------------------**
** xdct.v
**
** 8x8 discrete Cosine Transform
**
** Copyright 2002 Andrey Filippov
**
** -----------------------------------------------------------------------------**
**  X313 is free software - hardware description language (HDL) code; you can
**  redistribute it and/or modify it under the terms of the GNU General Public License
**  as published by the Free Software Foundation; either version 2 of the License, or
**  (at your option) any later version.
**
***********************************************************************/

/*
after I added DC subtraction before DCT I got 9-bit (allthough not likely to go out of 8bit range) signed data.
also increased transpose memory to 9 bits (anyway it is 16-bit wide) - see if it will help to prevent saturation
without significant increase in gates

Saturatuion is still visible on real pictures, but there was a bug - addsub<i>a_comp, addsub<i>b_comp where not using their
MSB. I added 1 more bit to add_sub<i>a and add_sub<i>b and fixed that bug. Only 2 mofre slices were used

*/


`timescale 1ns/1ps

module xdct   ( clk, // top level module
				    en,		  // if zero will reset transpose memory page njumbers
				    start,	  // single-cycle start pulse that goes with the first pixel data. Other 63 should follow
				    xin,	  // [7:0] - input data
					 last_in,	// output high during input of the last of 64 pixels in a 8x8 block
					 pre_first_out,// 1 cycle ahead of the first output in a 64 block
				    dv,		  // data output valid. Will go high on the 94-th cycle after the start
				    d_out);// [8:0]output data

  input			clk;
  input			en,start;	
  input	[8:0] xin;
  output			last_in;
  output			pre_first_out;
  output			dv;
  output [11:0] d_out;

  wire			clk, en,start,dv,stage1_done, tm_page,tm_we;
  wire	[8:0] xin;
  wire	[11:0] d_out;
  wire	[6:0] tm_ra;
  wire	[6:0] tm_wa;
  wire  [15:0] tm_out;	// only 8 LSBs are used
  wire   [9:0] tm_di;
  reg				last_in;
  wire			pre_first_out;

 always @ (posedge clk) last_in		  <= (tm_wa[5:0]== 6'h30);
 dct_stage1 i_dct_stage1( .clk(clk),
						  .en(en),
						  .start(start),
						  .xin(xin),	  // [7:0]
						  .we(tm_we),		  // write to transpose memory
						  .wr_cntr(tm_wa), // [6:0]	transpose memory write address
						  .z_out(tm_di),
						  .page(tm_page),
						  .done(stage1_done));
 dct_stage2 i_dct_stage2( .clk(clk),
						  .en(en),
						  .start(stage1_done),	  // stage 1 finished, data available in transpose memory
						  .page(tm_page),	  // transpose memory page finished, valid at start
						  .rd_cntr(tm_ra[6:0]), // [6:0]	transpose memory read address
						  .tdin(tm_out[9:0]),	  // [7:0] - data from transpose memory
						  .endv(pre_first_out),
						  .dv(dv),		  // data output valid
						  .dct2_out(d_out[11:0]));// [10:0]output data

// transpose memory (will use only low 8 bits - increase later if really needed)
	RAMB4_S16_S16 i_transpose_mem(//.DOA(ch3do[15:0]),
                    .DOB(tm_out[15:0]),
						  .ADDRA({1'b0,tm_wa[6:0]}),
						  .CLKA(clk),
						  .DIA({6'b0,tm_di[9:0]}),
						  .ENA(1'b1),.RSTA(1'b0),
						  .WEA(tm_we),
						  .ADDRB({1'b0,tm_ra[6:0]}),
						  .CLKB(clk),
						  .DIB(16'b0),.ENB(1'b1),.RSTB(1'b0),.WEB(1'b0));

endmodule

// 01/24/2004: Moved all clocks in stage 1 to "negedge" to reduce current pulses

module dct_stage1 ( clk,
						  en,
						  start,	  // single-cycle start pulse to replace RST
						  xin,	  // [7:0]
						  we,		  // write to transpose memory
						  wr_cntr, // [6:0]	transpose memory write address
						  z_out,	  //data to transpose memory
						  page,	// transpose memory page just filled (valid @ done)
						  done);   // last cycle writing to transpose memory - may use after it (move it earlier?)
  input			clk;
  input			en,start;	
  input	[8:0] xin;
  output			we;
  output	[6:0] wr_cntr;
  output [9:0] z_out;
  output			page;
  output			done;

/* constants */

reg[7:0] memory1a, memory2a, memory3a, memory4a;

/* 1D section */
/* The max value of a pixel after processing (to make their expected mean to zero)
is 127. If all the values in a row are 127, the max value of the product terms
would be (127*2)*(23170/256) and that of z_out_int would be (127*8)*23170/256.
This value divided by 2raised to 8 is equivalent to ignoring the 8 lsb bits of the value */

reg[8:0] xa0_in, xa1_in, xa2_in, xa3_in, xa4_in, xa5_in, xa6_in, xa7_in;
reg[8:0] xa0_reg, xa1_reg, xa2_reg, xa3_reg, xa4_reg, xa5_reg, xa6_reg, xa7_reg;
reg[8:0] addsub1a_comp,addsub2a_comp,addsub3a_comp,addsub4a_comp;
reg[9:0] add_sub1a,add_sub2a,add_sub3a,add_sub4a;
reg save_sign1a, save_sign2a, save_sign3a, save_sign4a;
reg[16:0] p1a,p2a,p3a,p4a;
wire[35:0] p1a_all,p2a_all,p3a_all,p4a_all;
reg toggleA;


//reg[16:0] z_out_int1,z_out_int2;
//reg[16:0] z_out_int;

reg[17:0] z_out_int1,z_out_int2;
reg[17:0] z_out_int;

wire[9:0] z_out_rnd;
wire[9:0] z_out_prelatch;
reg [2:0] indexi;

/* clks and counters */
reg [6:0] wr_cntr_prelatch;

/* memory section */

reg done_prelatch;
reg we_prelatch;
wire enwe;
wire	pre_sxregs;
reg	sxregs;
reg page_prelatch;

// outputs from output latches to cross clock edge boundary
wire[9:0] z_out;
wire[6:0] wr_cntr;
wire done;
wire we;
wire page;


// to conserve energy by disabling toggleA

wire	sxregs_d8;
reg	enable_toggle;
  SRL16_1 i_sxregs_d8   (.Q(sxregs_d8), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk),.D(sxregs));	// dly=7+1
  always @ (negedge clk) enable_toggle <= en && (sxregs || (enable_toggle && !sxregs_d8));

 always @ (negedge clk) done_prelatch<= (wr_cntr_prelatch[5:0]==6'h3f);
 always @ (negedge clk) if (wr_cntr_prelatch[5:0]==6'h3f) page_prelatch <= wr_cntr_prelatch[6];

 always @ (negedge clk) we_prelatch<= enwe || (en && we_prelatch && (wr_cntr_prelatch[5:0]!=6'h3f));

 always @ (negedge clk )
   if     (!en) wr_cntr_prelatch <= 7'b0;
   else if (we_prelatch) wr_cntr_prelatch <= wr_cntr_prelatch + 1;
 SRL16_1 i_pre_sxregs (.Q(pre_sxregs), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk), .D(start));	// dly=6+1
 SRL16_1 i_enwe       (.Q(enwe), .A0(1'b1), .A1(1'b0), .A2(1'b1), .A3(1'b0), .CLK(clk), .D(pre_sxregs));	// dly=5+1

 always @ (negedge clk ) sxregs <= pre_sxregs || ((wr_cntr_prelatch[2:0]==3'h1) && (wr_cntr_prelatch[5:3]!=3'h7));



//always @ (negedge clk) toggleA <= sxregs || (~toggleA);
  always @ (negedge clk) toggleA <= sxregs || (enable_toggle && (~toggleA));



always @ (negedge clk)
   if (sxregs) indexi <= 3'h7;
//	else indexi<=indexi+1;
	else if (enable_toggle) indexi<=indexi+1;


/*  1D-DCT BEGIN */

// store  1D-DCT constant coeeficient values for multipliers */

always @ (negedge clk)
   begin
	     case (indexi)
         0 : begin memory1a <= 8'd91; 
                   memory2a <= 8'd91; 
                   memory3a <= 8'd91; 
                   memory4a <= 8'd91;end
         1 : begin memory1a <= 8'd126; 
                   memory2a <= 8'd106;  
                   memory3a <= 8'd71;  
                   memory4a <= 8'd25;end
         2 : begin memory1a <= 8'd118; 
                   memory2a <= 8'd49;  
                   memory3a[7] <= 1'b1; memory3a[6:0] <= 7'd49;//-8'd49; 
                   memory4a[7] <= 1'b1; memory4a[6:0] <= 7'd118;// end -8'd118;end
                   end
         3 : begin memory1a <= 8'd106; 
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd25;//-8'd25;  
                   memory3a[7] <= 1'b1; memory3a[6:0] <= 7'd126;//-8'd126; 
                   memory4a[7] <= 1'b1; memory4a[6:0] <= 7'd71;end//-8'd71;end
         4 : begin memory1a <= 8'd91; 
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd91;//-8'd91; 
                   memory3a[7] <= 1'b1; memory3a[6:0] <= 7'd91;//-8'd91; 
                   memory4a <= 8'd91;end
         5 : begin memory1a <= 8'd71; 
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd126;//-8'd126; 
                   memory3a <= 8'd25;   
                   memory4a <= 8'd106;end
         6 : begin memory1a <= 8'd49; 
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd118;//-8'd118; 
                   memory3a <= 8'd118;  
                   memory4a[7] <= 1'b1; memory4a[6:0] <= 7'd49;end//-8'd49;end
         7 : begin memory1a <= 8'd25;  
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd71;//-8'd71; 
                   memory3a <= 8'd106;  
                   memory4a[7] <= 1'b1; memory4a[6:0] <= 7'd126;end//-8'd126;end
       endcase
end

/* 8-bit input shifted 8 times thru a shift register*/
// xa0_in will see output registers from posedge, may be replaced by latches if needed - but currently delay is under 5ns
always @ (negedge clk)
   begin
       xa0_in <= xin; xa1_in <= xa0_in; xa2_in <= xa1_in; xa3_in <= xa2_in;
       xa4_in <= xa3_in; xa5_in <= xa4_in; xa6_in <= xa5_in; xa7_in <= xa6_in;
   end

/* shifted inputs registered every 8th clk (using cntr8)*/

always @ (negedge clk)
    if (sxregs)
       begin 
       xa0_reg <= {xa0_in}; xa1_reg <= {xa1_in}; 
       xa2_reg <= {xa2_in}; xa3_reg <= {xa3_in};
       xa4_reg <= {xa4_in}; xa5_reg <= {xa5_in}; 
       xa6_reg <= {xa6_in}; xa7_reg <= {xa7_in};
       end

/* adder / subtractor block */
always @ (negedge clk)
       if (toggleA == 1'b1) begin
           add_sub1a <= ({xa7_reg[8],xa7_reg[8:0]} + {xa0_reg[8],xa0_reg[8:0]});
			  add_sub2a <= ({xa6_reg[8],xa6_reg[8:0]} + {xa1_reg[8],xa1_reg[8:0]});
           add_sub3a <= ({xa5_reg[8],xa5_reg[8:0]} + {xa2_reg[8],xa2_reg[8:0]});
			  add_sub4a <= ({xa4_reg[8],xa4_reg[8:0]} + {xa3_reg[8],xa3_reg[8:0]});
       end else begin
           add_sub1a <= ({xa7_reg[8],xa7_reg[8:0]} - {xa0_reg[8],xa0_reg[8:0]});
			  add_sub2a <= ({xa6_reg[8],xa6_reg[8:0]} - {xa1_reg[8],xa1_reg[8:0]});
           add_sub3a <= ({xa5_reg[8],xa5_reg[8:0]} - {xa2_reg[8],xa2_reg[8:0]});
			  add_sub4a <= ({xa4_reg[8],xa4_reg[8:0]} - {xa3_reg[8],xa3_reg[8:0]});
       end

// First valid add_sub appears at the 10th clk (8 clks for shifting inputs,
// 9th clk for registering shifted input and 10th clk for add_sub
// to synchronize the i value to the add_sub value, i value is incremented
// only after 10 clks
always @ (negedge clk) begin
		 save_sign1a	<= add_sub1a[9];
		 save_sign2a	<= add_sub2a[9];
		 save_sign3a	<= add_sub3a[9];
		 save_sign4a	<= add_sub4a[9];
		 addsub1a_comp	<= add_sub1a[9]? (-add_sub1a) : add_sub1a;
		 addsub2a_comp	<= add_sub2a[9]? (-add_sub2a) : add_sub2a;
		 addsub3a_comp	<= add_sub3a[9]? (-add_sub3a) : add_sub3a;
		 addsub4a_comp	<= add_sub4a[9]? (-add_sub4a) : add_sub4a;
end

     assign p1a_all = addsub1a_comp * memory1a[6:0];
     assign p2a_all = addsub2a_comp * memory2a[6:0];
     assign p3a_all = addsub3a_comp * memory3a[6:0];
     assign p4a_all = addsub4a_comp * memory4a[6:0];


always @ (negedge clk)
      begin
        p1a <= (save_sign1a ^ memory1a[7]) ? (-p1a_all[16:0]) :(p1a_all[16:0]);
        p2a <= (save_sign2a ^ memory2a[7]) ? (-p2a_all[16:0]) :(p2a_all[16:0]);
        p3a <= (save_sign3a ^ memory3a[7]) ? (-p3a_all[16:0]) :(p3a_all[16:0]);
        p4a <= (save_sign4a ^ memory4a[7]) ? (-p4a_all[16:0]) :(p4a_all[16:0]);
      end

/* Final adder. Adding the ouputs of the 4 multipliers */
always @ (negedge clk)
   begin
       z_out_int1 <= ({p1a[16],p1a} + {p2a[16],p2a});
       z_out_int2 <= ({p3a[16],p3a} + {p4a[16],p4a});
       z_out_int <= (z_out_int1 + z_out_int2);
   end

// rounding of the value
assign z_out_rnd = z_out_int[17:8];
assign z_out_prelatch = z_out_int[7] ? (z_out_rnd + 1'b1) : z_out_rnd;
//wire TEST_zout= z_out_int[17] ^z_out_int[16];

// outputs from output latches to cross clock edge boundary
//wire[9:0] z_out;
//wire[6:0] wr_cntr;
//wire done;
//wire we;
//wire page;
  LD i_z_out9 (.Q(z_out[9]),.G(clk),.D(z_out_prelatch[9]));  
  LD i_z_out8 (.Q(z_out[8]),.G(clk),.D(z_out_prelatch[8]));  
  LD i_z_out7 (.Q(z_out[7]),.G(clk),.D(z_out_prelatch[7]));  
  LD i_z_out6 (.Q(z_out[6]),.G(clk),.D(z_out_prelatch[6]));  
  LD i_z_out5 (.Q(z_out[5]),.G(clk),.D(z_out_prelatch[5]));  
  LD i_z_out4 (.Q(z_out[4]),.G(clk),.D(z_out_prelatch[4]));  
  LD i_z_out3 (.Q(z_out[3]),.G(clk),.D(z_out_prelatch[3]));  
  LD i_z_out2 (.Q(z_out[2]),.G(clk),.D(z_out_prelatch[2]));  
  LD i_z_out1 (.Q(z_out[1]),.G(clk),.D(z_out_prelatch[1]));  
  LD i_z_out0 (.Q(z_out[0]),.G(clk),.D(z_out_prelatch[0]));  

  LD i_wr_cntr6 (.Q(wr_cntr[6]),.G(clk),.D(wr_cntr_prelatch[6]));  
  LD i_wr_cntr5 (.Q(wr_cntr[5]),.G(clk),.D(wr_cntr_prelatch[5]));  
  LD i_wr_cntr4 (.Q(wr_cntr[4]),.G(clk),.D(wr_cntr_prelatch[4]));  
  LD i_wr_cntr3 (.Q(wr_cntr[3]),.G(clk),.D(wr_cntr_prelatch[3]));  
  LD i_wr_cntr2 (.Q(wr_cntr[2]),.G(clk),.D(wr_cntr_prelatch[2]));  
  LD i_wr_cntr1 (.Q(wr_cntr[1]),.G(clk),.D(wr_cntr_prelatch[1]));  
  LD i_wr_cntr0 (.Q(wr_cntr[0]),.G(clk),.D(wr_cntr_prelatch[0]));  

  LD i_done     (.Q(done), .G(clk), .D(done_prelatch));  
  LD i_we       (.Q(we),   .G(clk), .D(we_prelatch));  
  LD i_page     (.Q(page), .G(clk), .D(page_prelatch));  

/* 1D-DCT END */
endmodule


module dct_stage2 ( clk,
                    en,
						  start,	  // stage 1 finished, data available in transpose memory
						  page,	  // transpose memory page finished, valid at start
						  rd_cntr, // [6:0]	transpose memory read address
						  tdin,	  // [7:0] - data from transpose memory
						  endv,		// one cycle ahead of starting (continuing) dv
						  dv,		  // data output valid
						  dct2_out);// [8:0]output data
  input			clk;
  input			en,start,page;	
//  input	[7:0] tdin;
  input	[9:0] tdin;
  output	[6:0] rd_cntr;
  output [11:0] dct2_out;
  output dv;
  output endv;
 wire [11:0] dct2_out;
/* constants */

reg[7:0] memory1a, memory2a, memory3a, memory4a;
reg [2:0] indexi;
reg dv;
/* 2D section */
//reg[7:0] xb0_in, xb1_in, xb2_in, xb3_in, xb4_in, xb5_in, xb6_in, xb7_in;
reg[9:0] xb0_in, xb1_in, xb2_in, xb3_in, xb4_in, xb5_in, xb6_in, xb7_in;
reg[9:0] xb0_reg, xb1_reg, xb2_reg, xb3_reg, xb4_reg, xb5_reg, xb6_reg, xb7_reg;
reg[9:0] addsub1b_comp,addsub2b_comp,addsub3b_comp,addsub4b_comp;
reg[10:0] add_sub1b,add_sub2b,add_sub3b,add_sub4b;
reg save_sign1b, save_sign2b, save_sign3b, save_sign4b;

reg[17:0] p1b,p2b,p3b,p4b;
wire[35:0] p1b_all,p2b_all,p3b_all,p4b_all;
reg toggleB;
reg[18:0] dct2d_int1,dct2d_int2;
reg[19:0] dct_2d_int;
wire[11:0] dct_2d_rnd;

// transpose memory read address
  wire   [6:0] rd_cntr;
  reg    [5:0]	rd_cntrs;
  reg          rd_page;

// start with the same as stage1
//wire pre_sxregs;
wire endv;
wire   sxregs;
// to conserve energy by disabling toggleB

wire	sxregs_d8;
reg	enable_toggle;
reg   en_started;

 SRL16 i_endv       (.Q(endv), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(start));	// dly=14+1
 SRL16 i_disdv      (.Q(disdv), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(rd_cntr[5:0]==6'h3f));	// dly=14+1


 SRL16 i_sxregs      (.Q(sxregs),    .A0(1'b0), .A1(1'b0), .A2(1'b0), .A3(1'b1), .CLK(clk),.D((rd_cntr[5:3]==3'h0) && en_started));	// dly=8+1
 SRL16 i_sxregs_d8   (.Q(sxregs_d8), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk),.D(sxregs && en_started));	// dly=7+1
  always @ (posedge clk) enable_toggle <= en && (sxregs || (enable_toggle && !sxregs_d8));
  always @ (posedge clk) en_started <= en && (start || en_started);

  always @ (posedge clk) dv <= en && (endv || (dv && ~disdv));

//  always @ (posedge clk) toggleB <= sxregs || (~toggleB);
  always @ (posedge clk) toggleB <= sxregs || (enable_toggle && (~toggleB));
  always @ (posedge clk)
   if (sxregs) indexi <= 3'h7;
//	else indexi<=indexi+1;
	else if (enable_toggle) indexi<=indexi+1;
  always @ (posedge clk) begin
    if (start) rd_page <= page;
    if (start) rd_cntrs[5:0] <=6'b0;	// will always count, but that does not matter- What about saving energy ;-) ? Saved...
    else if (rd_cntrs[5:0]!=6'h3f) rd_cntrs[5:0] <= rd_cntrs[5:0]+1;
//    else rd_cntrs[5:0] <= rd_cntrs[5:0]+1;
  end 
  assign	rd_cntr[6:0]= {rd_page,rd_cntrs[2:0],rd_cntrs[5:3]};

// duplicate memory<i>a from stage 1
// store  1D-DCT constant coeeficient values for multipliers */

always @ (posedge clk)
   begin
	     case (indexi)
         0 : begin memory1a <= 8'd91; 
                   memory2a <= 8'd91; 
                   memory3a <= 8'd91; 
                   memory4a <= 8'd91;end
         1 : begin memory1a <= 8'd126; 
                   memory2a <= 8'd106;  
                   memory3a <= 8'd71;  
                   memory4a <= 8'd25;end
         2 : begin memory1a <= 8'd118; 
                   memory2a <= 8'd49;  
                   memory3a[7] <= 1'b1; memory3a[6:0] <= 7'd49;//-8'd49; 
                   memory4a[7] <= 1'b1; memory4a[6:0] <= 7'd118;// end -8'd118;end
                   end
         3 : begin memory1a <= 8'd106; 
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd25;//-8'd25;  
                   memory3a[7] <= 1'b1; memory3a[6:0] <= 7'd126;//-8'd126; 
                   memory4a[7] <= 1'b1; memory4a[6:0] <= 7'd71;end//-8'd71;end
         4 : begin memory1a <= 8'd91; 
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd91;//-8'd91; 
                   memory3a[7] <= 1'b1; memory3a[6:0] <= 7'd91;//-8'd91; 
                   memory4a <= 8'd91;end
         5 : begin memory1a <= 8'd71; 
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd126;//-8'd126; 
                   memory3a <= 8'd25;   
                   memory4a <= 8'd106;end
         6 : begin memory1a <= 8'd49; 
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd118;//-8'd118; 
                   memory3a <= 8'd118;  
                   memory4a[7] <= 1'b1; memory4a[6:0] <= 7'd49;end//-8'd49;end
         7 : begin memory1a <= 8'd25;  
                   memory2a[7] <= 1'b1; memory2a[6:0] <= 7'd71;//-8'd71; 
                   memory3a <= 8'd106;  
                   memory4a[7] <= 1'b1; memory4a[6:0] <= 7'd126;end//-8'd126;end
       endcase
end



always @ (posedge clk)
   begin
       xb0_in <= tdin;   xb1_in <= xb0_in; xb2_in <= xb1_in; xb3_in <= xb2_in;
       xb4_in <= xb3_in; xb5_in <= xb4_in; xb6_in <= xb5_in; xb7_in <= xb6_in;
   end

/* register inputs, inputs read in every eighth clk*/

always @ (posedge clk)
   if (sxregs) begin
       xb0_reg <= xb0_in; xb1_reg <= xb1_in; 
       xb2_reg <= xb2_in; xb3_reg <= xb3_in;
       xb4_reg <= xb4_in; xb5_reg <= xb5_in; 
       xb6_reg <= xb6_in; xb7_reg <= xb7_in;

   end



always @ (posedge clk)
       if (toggleB == 1'b1) begin
           add_sub1b <= ({xb7_reg[9],xb7_reg[9:0]} + {xb0_reg[9],xb0_reg[9:0]});
			  add_sub2b <= ({xb6_reg[9],xb6_reg[9:0]} + {xb1_reg[9],xb1_reg[9:0]});
           add_sub3b <= ({xb5_reg[9],xb5_reg[9:0]} + {xb2_reg[9],xb2_reg[9:0]});
			  add_sub4b <= ({xb4_reg[9],xb4_reg[9:0]} + {xb3_reg[9],xb3_reg[9:0]});
       end else begin
           add_sub1b <= ({xb7_reg[9],xb7_reg[9:0]} - {xb0_reg[9],xb0_reg[9:0]});
			  add_sub2b <= ({xb6_reg[9],xb6_reg[9:0]} - {xb1_reg[9],xb1_reg[9:0]});
           add_sub3b <= ({xb5_reg[9],xb5_reg[9:0]} - {xb2_reg[9],xb2_reg[9:0]});
			  add_sub4b <= ({xb4_reg[9],xb4_reg[9:0]} - {xb3_reg[9],xb3_reg[9:0]});
       end


always @ (posedge clk) begin
		 save_sign1b	<= add_sub1b[10];
		 save_sign2b	<= add_sub2b[10];
		 save_sign3b	<= add_sub3b[10];
		 save_sign4b	<= add_sub4b[10];
		 addsub1b_comp	<= add_sub1b[10]? (-add_sub1b) : add_sub1b;
		 addsub2b_comp	<= add_sub2b[10]? (-add_sub2b) : add_sub2b;
		 addsub3b_comp	<= add_sub3b[10]? (-add_sub3b) : add_sub3b;
		 addsub4b_comp	<= add_sub4b[10]? (-add_sub4b) : add_sub4b;
end

     assign p1b_all = addsub1b_comp * memory1a[6:0];
     assign p2b_all = addsub2b_comp * memory2a[6:0];
     assign p3b_all = addsub3b_comp * memory3a[6:0];
     assign p4b_all = addsub4b_comp * memory4a[6:0];


always @ (posedge clk)
  begin       
        p1b <= (save_sign1b ^ memory1a[7]) ? (-p1b_all) :(p1b_all);
        p2b <= (save_sign2b ^ memory2a[7]) ? (-p2b_all) :(p2b_all);
        p3b <= (save_sign3b ^ memory3a[7]) ? (-p3b_all) :(p3b_all);
        p4b <= (save_sign4b ^ memory4a[7]) ? (-p4b_all) :(p4b_all);
  end

/* multiply the outputs of the add/sub block with the 8 sets of stored coefficients */

/* Final adder. Adding the ouputs of the 4 multipliers */

always @ (posedge clk)
   begin
       dct2d_int1 <= ({p1b[17],p1b[17:0]} + {p2b[17],p2b[17:0]});
       dct2d_int2 <= ({p3b[17],p3b[17:0]} + {p4b[17],p4b[17:0]});
       dct_2d_int <= ({dct2d_int1[18],dct2d_int1[18:0]} + {dct2d_int2[18],dct2d_int2[18:0]});
   end

assign dct_2d_rnd[11:0] = dct_2d_int[19:8];
assign dct2_out[11:0] = dct_2d_rnd[11:0] + dct_2d_int[7];


endmodule

Для просмотра полной версии этой страницы, пожалуйста, пройдите по ссылке.