From 3a12592c5325404ccc2cdf64e0bc45999dbdbb93 Mon Sep 17 00:00:00 2001 From: marqs Date: Mon, 7 Oct 2019 01:25:33 +0300 Subject: [PATCH] fix linebuf read address timing bottleneck --- ossc.qsf | 2 +- rtl/ossc.v | 10 ++-- rtl/scanconverter.v | 122 ++++++++++++++++++++++---------------------- rtl/videogen.v | 8 +-- 4 files changed, 67 insertions(+), 75 deletions(-) diff --git a/ossc.qsf b/ossc.qsf index cc59c9f..533bb81 100644 --- a/ossc.qsf +++ b/ossc.qsf @@ -218,7 +218,7 @@ set_global_assignment -name ENABLE_SIGNALTAP OFF set_global_assignment -name USE_SIGNALTAP_FILE output_files/ossc_la.stp set_global_assignment -name FITTER_EFFORT "AUTO FIT" -set_global_assignment -name SEED 2 +set_global_assignment -name SEED 4 diff --git a/rtl/ossc.v b/rtl/ossc.v index c251f04..030d667 100644 --- a/rtl/ossc.v +++ b/rtl/ossc.v @@ -186,9 +186,9 @@ assign ypos = enable_sc ? ypos_sc : ypos_vg; assign HDMI_TX_PCLK = PCLK_out; always @(posedge PCLK_out) begin - HDMI_TX_RD <= enable_sc ? R_out_sc : R_out_vg; - HDMI_TX_GD <= enable_sc ? G_out_sc : G_out_vg; - HDMI_TX_BD <= enable_sc ? B_out_sc : B_out_vg; + HDMI_TX_RD <= osd_enable ? {8{osd_color}} : (enable_sc ? R_out_sc : R_out_vg); + HDMI_TX_GD <= osd_enable ? {8{osd_color}} : (enable_sc ? G_out_sc : G_out_vg); + HDMI_TX_BD <= osd_enable ? 8'hff : (enable_sc ? B_out_sc : B_out_vg); HDMI_TX_HS <= enable_sc ? HSYNC_out_sc : HSYNC_out_vg; HDMI_TX_VS <= enable_sc ? VSYNC_out_sc : VSYNC_out_vg; HDMI_TX_DE <= enable_sc ? DE_out_sc : DE_out_vg; @@ -293,8 +293,6 @@ scanconverter scanconverter_inst ( .vsync_flag (vsync_flag), .lt_active (lt_active), .lt_mode (lt_mode_synced), - .osd_enable (osd_enable), - .osd_color (osd_color), .xpos (xpos_sc), .ypos (ypos_sc), .pll_areset (pll_areset), @@ -334,8 +332,6 @@ videogen vg0 ( .reset_n (po_reset_n & ~enable_sc), .lt_active (lt_active), .lt_mode (lt_mode_synced), - .osd_enable (osd_enable), - .osd_color (osd_color), .R_out (R_out_vg), .G_out (G_out_vg), .B_out (B_out_vg), diff --git a/rtl/scanconverter.v b/rtl/scanconverter.v index 998328c..cfb87f8 100644 --- a/rtl/scanconverter.v +++ b/rtl/scanconverter.v @@ -62,8 +62,13 @@ `define HSYNC_LEADING_EDGE ((HSYNC_in_L == `HI) & (HSYNC_in == `LO)) `define VSYNC_LEADING_EDGE ((VSYNC_in_L == `HI) & (VSYNC_in == `LO)) +`define PP_PL_START 1 +`define PP_HS_VS_DE_START 2 +`define PP_ENABLES_START 2 +`define PP_RGB_START 4 + //`define PP_RLPF_PL_START_EARLY // set if start with 2 -`define PP_RLPF_PL_START 3 // minimum 2 +`define PP_RLPF_PL_START `PP_RGB_START // minimum 2 `define PP_RLPF_PL_LENGTH 3 // counted from aquisition `define PP_SLGEN_PL_LENGTH 5 `define PP_LT_BORDER_GEN_LENGTH 1 // lt_box / border_mask gen @@ -106,8 +111,6 @@ module scanconverter ( output vsync_flag, input lt_active, input [1:0] lt_mode, - input osd_enable, - input osd_color, output reg [10:0] xpos, output reg [10:0] ypos, input pll_areset, @@ -127,7 +130,7 @@ wire pll_lock; //RGB signals®isters: 8 bits per component -> 16.7M colors wire [7:0] R_act, G_act, B_act; wire [7:0] R_lbuf, G_lbuf, B_lbuf; -reg [7:0] R_in_L, G_in_L, B_in_L, R_in_LL, G_in_LL, B_in_LL, R_1x, G_1x, B_1x; +reg [7:0] R_in_L, G_in_L, B_in_L, R_in_LL, G_in_LL, B_in_LL, R_in_LLL, G_in_LLL, B_in_LLL, R_1x, G_1x, B_1x; //H+V syncs + data enable signals®isters wire HSYNC_act, VSYNC_act, DE_act; @@ -141,7 +144,8 @@ reg FID_cur, FID_last, FID_prev, FID_1x; reg frame_change, frame_change_longpulse, line_change; //H+V counters -wire [11:0] linebuf_hoffset; //Offset for line (max. 2047 pixels), MSB indicates which line is read/written +reg [11:0] linebuf_hoffset_pp; //Offset for line (max. 2047 pixels), MSB indicates which line is read/written +wire [11:0] linebuf_hoffset_act; wire [11:0] hcnt_act; reg [11:0] hcnt_1x, hcnt_2x, hcnt_3x, hcnt_4x, hcnt_5x, hcnt_4x_aspfix, hcnt_2x_opt, hcnt_3x_opt, hcnt_4x_opt, hcnt_5x_opt, hcnt_5x_hscomp; reg [2:0] hcnt_2x_opt_ctr, hcnt_3x_opt_ctr, hcnt_4x_opt_ctr, hcnt_5x_opt_ctr; @@ -368,7 +372,7 @@ case (V_MULTMODE) hcnt_act = hcnt_1x; vcnt_act = vcnt_1x; pclk_mux_sel = `PCLK_MUX_1X; - linebuf_hoffset = 0; + linebuf_hoffset_act = 0; col_id_act = {2'b00, hcnt_1x[0]}; rlpf_trigger_act = 1'b1; end @@ -385,19 +389,19 @@ case (V_MULTMODE) case (H_MULTMODE) default: begin //`H_MULTMODE_FULLWIDTH pclk_mux_sel = `PCLK_MUX_2X; - linebuf_hoffset = hcnt_2x; + linebuf_hoffset_act = hcnt_2x; col_id_act = {2'b00, hcnt_2x[0]}; rlpf_trigger_act = 1'b1; end `H_MULTMODE_OPTIMIZED_1X: begin pclk_mux_sel = `PCLK_MUX_1X; //special case: pclk bypass to enable 2x native sampling - linebuf_hoffset = hcnt_2x_opt; + linebuf_hoffset_act = hcnt_2x_opt; col_id_act = {2'b00, hcnt_2x[1]}; rlpf_trigger_act = (hcnt_2x_opt_ctr == 0); end `H_MULTMODE_OPTIMIZED: begin pclk_mux_sel = `PCLK_MUX_2X; - linebuf_hoffset = hcnt_2x_opt; + linebuf_hoffset_act = hcnt_2x_opt; col_id_act = hcnt_2x_opt_ctr; rlpf_trigger_act = (hcnt_2x_opt_ctr == 0); end @@ -415,21 +419,21 @@ case (V_MULTMODE) case (H_MULTMODE) default: begin //`H_MULTMODE_FULLWIDTH pclk_mux_sel = `PCLK_MUX_3X; - linebuf_hoffset = hcnt_3x; + linebuf_hoffset_act = hcnt_3x; hcnt_act = hcnt_3x; col_id_act = {2'b00, hcnt_3x[0]}; rlpf_trigger_act = 1'b1; end `H_MULTMODE_ASPECTFIX: begin pclk_mux_sel = `PCLK_MUX_4X; - linebuf_hoffset = hcnt_4x_aspfix; + linebuf_hoffset_act = hcnt_4x_aspfix; hcnt_act = hcnt_4x_aspfix; col_id_act = {2'b00, hcnt_4x[0]}; rlpf_trigger_act = 1'b1; end `H_MULTMODE_OPTIMIZED: begin pclk_mux_sel = `PCLK_MUX_3X; - linebuf_hoffset = hcnt_3x_opt; + linebuf_hoffset_act = hcnt_3x_opt; hcnt_act = hcnt_3x; col_id_act = hcnt_3x_opt_ctr; rlpf_trigger_act = (hcnt_3x_opt_ctr == 0); @@ -449,12 +453,12 @@ case (V_MULTMODE) pclk_mux_sel = `PCLK_MUX_4X; case (H_MULTMODE) default: begin //`H_MULTMODE_FULLWIDTH - linebuf_hoffset = hcnt_4x; + linebuf_hoffset_act = hcnt_4x; col_id_act = {2'b00, hcnt_4x[0]}; rlpf_trigger_act = 1'b1; end `H_MULTMODE_OPTIMIZED: begin - linebuf_hoffset = hcnt_4x_opt; + linebuf_hoffset_act = hcnt_4x_opt; col_id_act = hcnt_4x_opt_ctr; rlpf_trigger_act = (hcnt_4x_opt_ctr == 0); end @@ -473,12 +477,12 @@ case (V_MULTMODE) pclk_mux_sel = `PCLK_MUX_5X; case (H_MULTMODE) default: begin //`H_MULTMODE_FULLWIDTH - linebuf_hoffset = hcnt_5x_hscomp; + linebuf_hoffset_act = hcnt_5x_hscomp; col_id_act = {2'b00, hcnt_5x[0]}; rlpf_trigger_act = 1'b1; end `H_MULTMODE_OPTIMIZED: begin - linebuf_hoffset = hcnt_5x_opt; + linebuf_hoffset_act = hcnt_5x_opt; col_id_act = hcnt_5x_opt_ctr; rlpf_trigger_act = (hcnt_5x_opt_ctr == 0); end @@ -522,7 +526,7 @@ defparam clkctrl1.lpm_type = "cycloneive_clkctrl"; -wire [11:0] linebuf_rdaddr = linebuf_hoffset-H_AVIDSTART; +wire [11:0] linebuf_rdaddr = linebuf_hoffset_pp-H_AVIDSTART; wire [11:0] linebuf_wraddr = hcnt_1x-H_AVIDSTART; //TODO: add secondary buffers for interlaced signals with alternative field order @@ -557,44 +561,43 @@ linebuf linebuf_rgb ( integer pp_idx; always @(posedge pclk_act) begin - line_id_pp[1] <= SL_ALTIV ? {2'b00, vcnt_act[0]} : line_id_act; - col_id_pp[1] <= col_id_act; - for(pp_idx = 2; pp_idx <= `PP_SLGEN_PL_END-5; pp_idx = pp_idx+1) begin + line_id_pp[`PP_PL_START] <= SL_ALTIV ? {2'b00, vcnt_act[0]} : line_id_act; + col_id_pp[`PP_PL_START] <= col_id_act; + for(pp_idx = `PP_PL_START+1; pp_idx <= `PP_SLGEN_PL_END-5; pp_idx = pp_idx+1) begin line_id_pp[pp_idx] <= line_id_pp[pp_idx-1]; col_id_pp[pp_idx] <= col_id_pp[pp_idx-1]; end hcnt_pp <= hcnt_act; vcnt_pp <= vcnt_act; + linebuf_hoffset_pp <= linebuf_hoffset_act; xpos <= hcnt_pp - H_AVIDSTART; ypos <= vcnt_pp - V_AVIDSTART; - border_enable_pp[2] <= ((hcnt_pp < H_AVIDMASK_START) | (hcnt_pp >= H_AVIDMASK_STOP) | (vcnt_pp < V_AVIDMASK_START) | (vcnt_pp >= V_AVIDMASK_STOP)); - for(pp_idx = 3; pp_idx <= `PP_PIPELINE_LENGTH; pp_idx = pp_idx+1) begin + + border_enable_pp[`PP_ENABLES_START] <= ((hcnt_pp < H_AVIDMASK_START) | (hcnt_pp >= H_AVIDMASK_STOP) | (vcnt_pp < V_AVIDMASK_START) | (vcnt_pp >= V_AVIDMASK_STOP)); + case (lt_mode) + default: begin + lt_box_enable_pp[`PP_ENABLES_START] <= 0; + end + `LT_POS_TOPLEFT: begin + lt_box_enable_pp[`PP_ENABLES_START] <= ((hcnt_pp < LT_POS_TOPLEFT_BOX_H_STOP) && (vcnt_pp < LT_POS_TOPLEFT_BOX_V_STOP)) ? 1'b1 : 1'b0; + end + `LT_POS_CENTER: begin + lt_box_enable_pp[`PP_ENABLES_START] <= ((hcnt_pp >= LT_POS_CENTER_BOX_H_START) && (hcnt_pp < LT_POS_CENTER_BOX_H_STOP) && (vcnt_pp >= LT_POS_CENTER_BOX_V_START) && (vcnt_pp < LT_POS_CENTER_BOX_V_STOP)) ? 1'b1 : 1'b0; + end + `LT_POS_BOTTOMRIGHT: begin + lt_box_enable_pp[`PP_ENABLES_START] <= ((hcnt_pp >= LT_POS_BOTTOMRIGHT_H_START) && (vcnt_pp >= LT_POS_BOTTOMRIGHT_V_START)) ? 1'b1 : 1'b0; + end + endcase + for(pp_idx = `PP_ENABLES_START+1; pp_idx <= `PP_PIPELINE_LENGTH; pp_idx = pp_idx+1) begin + lt_box_enable_pp[pp_idx] <= lt_box_enable_pp[pp_idx-1]; border_enable_pp[pp_idx] <= border_enable_pp[pp_idx-1]; end - case (lt_mode) - default: begin - lt_box_enable_pp[2] <= 0; - end - `LT_POS_TOPLEFT: begin - lt_box_enable_pp[2] <= ((hcnt_pp < LT_POS_TOPLEFT_BOX_H_STOP) && (vcnt_pp < LT_POS_TOPLEFT_BOX_V_STOP)) ? 1'b1 : 1'b0; - end - `LT_POS_CENTER: begin - lt_box_enable_pp[2] <= ((hcnt_pp >= LT_POS_CENTER_BOX_H_START) && (hcnt_pp < LT_POS_CENTER_BOX_H_STOP) && (vcnt_pp >= LT_POS_CENTER_BOX_V_START) && (vcnt_pp < LT_POS_CENTER_BOX_V_STOP)) ? 1'b1 : 1'b0; - end - `LT_POS_BOTTOMRIGHT: begin - lt_box_enable_pp[2] <= ((hcnt_pp >= LT_POS_BOTTOMRIGHT_H_START) && (vcnt_pp >= LT_POS_BOTTOMRIGHT_V_START)) ? 1'b1 : 1'b0; - end - endcase - for(pp_idx = 3; pp_idx <= `PP_PIPELINE_LENGTH; pp_idx = pp_idx+1) begin - lt_box_enable_pp[pp_idx] <= lt_box_enable_pp[pp_idx-1]; - end - - HSYNC_pp[2] <= HSYNC_act; - VSYNC_pp[2] <= VSYNC_act; - DE_pp[2] <= DE_act; - for(pp_idx = 3; pp_idx <= `PP_PIPELINE_LENGTH; pp_idx = pp_idx+1) begin + HSYNC_pp[`PP_HS_VS_DE_START] <= HSYNC_act; + VSYNC_pp[`PP_HS_VS_DE_START] <= VSYNC_act; + DE_pp[`PP_HS_VS_DE_START] <= DE_act; + for(pp_idx = `PP_HS_VS_DE_START+1; pp_idx <= `PP_PIPELINE_LENGTH; pp_idx = pp_idx+1) begin HSYNC_pp[pp_idx] <= HSYNC_pp[pp_idx-1]; VSYNC_pp[pp_idx] <= VSYNC_pp[pp_idx-1]; DE_pp[pp_idx] <= DE_pp[pp_idx-1]; @@ -604,10 +607,10 @@ begin DE_out <= DE_pp[`PP_PIPELINE_LENGTH]; // get RGB and delay it - R_pp[3] <= R_act; - G_pp[3] <= G_act; - B_pp[3] <= B_act; - for(pp_idx = 4; pp_idx <= `PP_PIPELINE_LENGTH; pp_idx = pp_idx + 1) begin + R_pp[`PP_RGB_START] <= R_act; + G_pp[`PP_RGB_START] <= G_act; + B_pp[`PP_RGB_START] <= B_act; + for(pp_idx = `PP_RGB_START+1; pp_idx <= `PP_PIPELINE_LENGTH; pp_idx = pp_idx + 1) begin R_pp[pp_idx] <= R_pp[pp_idx-1]; G_pp[pp_idx] <= G_pp[pp_idx-1]; B_pp[pp_idx] <= B_pp[pp_idx-1]; @@ -617,9 +620,9 @@ begin B_out <= B_pp[`PP_PIPELINE_LENGTH]; // reverse LPF ... - rlpf_trigger_r[1] <= rlpf_trigger_act; - for(pp_idx = 2; pp_idx <= `PP_RLPF_PL_START-1; pp_idx = pp_idx + 1) - rlpf_trigger_r[`PP_RLPF_PL_START-1] <= rlpf_trigger_r[1]; + rlpf_trigger_r[`PP_PL_START] <= rlpf_trigger_act; + for(pp_idx = `PP_PL_START+1; pp_idx <= `PP_RLPF_PL_START-1; pp_idx = pp_idx + 1) + rlpf_trigger_r[pp_idx] <= rlpf_trigger_r[pp_idx-1]; // Optimized modes repeat pixels. Save previous pixel only when linebuffer offset changes. if (rlpf_trigger_r[`PP_RLPF_PL_START-1]) begin @@ -708,11 +711,7 @@ begin end // apply LT box / mask - if (osd_enable) begin - R_out <= {8{osd_color}}; - G_out <= {8{osd_color}}; - B_out <= 8'hff; - end else if (lt_active) begin + if (lt_active) begin R_out <= {8{lt_box_enable_pp[`PP_PIPELINE_LENGTH]}}; G_out <= {8{lt_box_enable_pp[`PP_PIPELINE_LENGTH]}}; B_out <= {8{lt_box_enable_pp[`PP_PIPELINE_LENGTH]}}; @@ -976,14 +975,17 @@ begin HSYNC_in_L <= HSYNC_in; VSYNC_in_L <= VSYNC_in; - // Add one delay stage to match linebuf delay + // Add two delay stages to match linebuf delay R_in_LL <= R_in_L; G_in_LL <= G_in_L; B_in_LL <= B_in_L; + R_in_LLL <= R_in_LL; + G_in_LLL <= G_in_LL; + B_in_LLL <= B_in_LL; - R_1x <= R_in_LL; - G_1x <= G_in_LL; - B_1x <= B_in_LL; + R_1x <= R_in_LLL; + G_1x <= G_in_LLL; + B_1x <= B_in_LLL; HSYNC_1x <= (hcnt_1x < H_SYNCLEN) ? `HSYNC_POL : ~`HSYNC_POL; if (FID_cur == `FID_EVEN) VSYNC_1x <= (vcnt_1x < V_SYNCLEN) ? `VSYNC_POL : ~`VSYNC_POL; diff --git a/rtl/videogen.v b/rtl/videogen.v index 1568e73..82f3547 100644 --- a/rtl/videogen.v +++ b/rtl/videogen.v @@ -24,8 +24,6 @@ module videogen ( input reset_n, input lt_active, input [1:0] lt_mode, - input osd_enable, - input osd_color, output reg [7:0] R_out, output reg [7:0] G_out, output reg [7:0] B_out, @@ -123,11 +121,7 @@ begin B_out <= 8'h00; DE_out <= 1'b0; end else begin - if (osd_enable) begin - R_out <= {8{osd_color}}; - G_out <= {8{osd_color}}; - B_out <= 8'hff; - end else if (lt_active) begin + if (lt_active) begin case (lt_mode) default: begin {R_out, G_out, B_out} <= {3{8'h00}};