head	1.10;
access;
symbols
	bg2_23:1.9
	bg2_22:1.9
	bg2_21:1.9
	bg2_20:1.9
	bg2_16:1.8
	bg2_15:1.8
	bg2_12:1.7
	bg2_07:1.7
	isorc2008_submission:1.4
	handbook_alpha_edition:1.4
	jtres2007_submission:1.4
	bg1_07:1.2
	bg1_06:1.2
	bg1_05:1.2
	TAL_101:1.2
	TAL_100:1.2
	jtres_submission:1.2
	wises06_submission:1.2
	lctes2006_submission:1.2
	rtgc_isorc2006:1.2.0.4
	isorc2006:1.2.0.2
	rtgc_paper:1.2;
locks; strict;
comment	@# @;


1.10
date	2008.08.21.16.06.23;	author 9914pich;	state Exp;
branches;
next	1.9;
commitid	2da448ad927b4567;

1.9
date	2008.07.21.12.18.40;	author martin;	state Exp;
branches;
next	1.8;
commitid	205348847e9c4567;

1.8
date	2008.05.30.13.07.14;	author 9914pich;	state Exp;
branches;
next	1.7;
commitid	1937483ffc014567;

1.7
date	2008.05.22.12.10.47;	author martin;	state Exp;
branches;
next	1.6;
commitid	115b483562c64567;

1.6
date	2008.02.23.23.18.46;	author martin;	state Exp;
branches;
next	1.5;
commitid	b7347c0a9b84567;

1.5
date	2008.02.20.14.29.32;	author martin;	state Exp;
branches;
next	1.4;
commitid	4d7c47bc39384567;

1.4
date	2007.06.01.13.05.32;	author 9914pich;	state Exp;
branches;
next	1.3;
commitid	64d24660199b4567;

1.3
date	2007.03.18.01.47.07;	author martin;	state Exp;
branches;
next	1.2;
commitid	32c145fc99f04567;

1.2
date	2005.12.02.20.48.54;	author martin;	state Exp;
branches;
next	1.1;
commitid	7ef34390b3304567;

1.1
date	2005.12.01.22.24.52;	author martin;	state Exp;
branches;
next	;
commitid	680438f78334567;


desc
@@


1.10
log
@fixed wait states are removed
@
text
@--
--
--  This file is a part of JOP, the Java Optimized Processor
--
--  Copyright (C) 2001-2008, Martin Schoeberl (martin@@jopdesign.com)
--
--  This program is free software: you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation, either version 3 of the License, or
--  (at your option) any later version.
--
--  This program is distributed in the hope that it will be useful,
--  but WITHOUT ANY WARRANTY; without even the implied warranty of
--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--  GNU General Public License for more details.
--
--  You should have received a copy of the GNU General Public License
--  along with this program.  If not, see <http://www.gnu.org/licenses/>.
--


--
--	sc_sram32_flash.vhd
--
--	SimpCon compliant external memory interface
--	for 32-bit SRAM (e.g. Cyclone board)
--
--	Connection between mem_sc and the external memory bus
--
--	memory mapping
--	
--		0x000000-x7ffff	external SRAM (w mirror)	max. 512 kW (4*4 MBit)
--		0x080000-xfffff	external Flash (w mirror)	max. 512 kB (4 MBit)
--		0x100000-xfffff	external NAND flash
--
--	RAM: 32 bit word
--	ROM: 8 bit word (for flash programming)
--
--	todo:
--		
--
--	2005-11-22	first version
--	2005-12-02	added flash interface
--	2008-05-22	nwe on pos edge, additional wait state for write

Library IEEE;
use IEEE.std_logic_1164.all;
use ieee.numeric_std.all;

use work.jop_types.all;
use work.sc_pack.all;

entity sc_mem_if is
generic (ram_ws : integer; rom_ws : integer);

port (

	clk, reset	: in std_logic;

--
--	SimpCon memory interface
--
	sc_mem_out		: in sc_out_type;
	sc_mem_in		: out sc_in_type;

-- memory interface

	ram_addr	: out std_logic_vector(17 downto 0);
	ram_dout	: out std_logic_vector(31 downto 0);
	ram_din		: in std_logic_vector(31 downto 0);
	ram_dout_en	: out std_logic;
	ram_ncs		: out std_logic;
	ram_noe		: out std_logic;
	ram_nwe		: out std_logic;

--
--	config/program flash and big nand flash interface
--
	fl_a	: out std_logic_vector(18 downto 0);
	fl_d	: inout std_logic_vector(7 downto 0);
	fl_ncs	: out std_logic;
	fl_ncsb	: out std_logic;
	fl_noe	: out std_logic;
	fl_nwe	: out std_logic;
	fl_rdy	: in std_logic

);
end sc_mem_if;

architecture rtl of sc_mem_if is

--
--	signals for mem interface
--
	type state_type		is (
							idl, rd1, rd2, wr1, wr2,
							fl_rd1, fl_rd2, fl_wr1, fl_wr2
						);
	signal state 		: state_type;
	signal next_state	: state_type;

	signal wait_state	: unsigned(3 downto 0);
	signal cnt			: unsigned(1 downto 0);

	signal dout_ena		: std_logic;
	signal ram_data		: std_logic_vector(31 downto 0);
	signal ram_data_ena	: std_logic;

	signal flash_dout	: std_logic_vector(7 downto 0);
	signal fl_dout_ena	: std_logic;
	signal flash_data	: std_logic_vector(7 downto 0);
	signal flash_data_ena	: std_logic;
	signal nand_rdy		: std_logic;

	signal trans_ram	: std_logic;
	signal trans_flash	: std_logic;
	-- selection for read mux
	signal ram_access	: std_logic;
	-- selection for Flash/NAND ncs
	signal sel_flash	: std_logic;
	
	signal ram_ws_wr	: integer;

begin

	ram_ws_wr <= ram_ws+1; -- additional wait state for SRAM
	
	assert SC_ADDR_SIZE>=21 report "Too less address bits";
	ram_dout_en <= dout_ena;

	sc_mem_in.rdy_cnt <= cnt;

--
--	decode ram/flash
--	The signals are only valid for the first cycle
--
process(sc_mem_out.address(20 downto 19))
begin

	trans_ram <= '0';
	trans_flash <= '0';

	case sc_mem_out.address(20 downto 19) is
		when "00" =>
			trans_ram <= '1';
		when "01" =>
			trans_flash <= '1';
		when others =>
			null;
	end case;

end process;

--
--	Register memory address, write data and read data
--
process(clk, reset)
begin
	if reset='1' then

		ram_addr <= (others => '0');
		ram_dout <= (others => '0');
		ram_data <= (others => '0');
		flash_dout <= (others => '0');
		fl_a <= (others => '0');
		sel_flash <= '1';			-- AMD default
		ram_access <= '1';			-- RAM default

	elsif rising_edge(clk) then

		if sc_mem_out.rd='1' or sc_mem_out.wr='1' then
			if trans_ram='1' then
				ram_access <= '1';
				ram_addr <= sc_mem_out.address(17 downto 0);
			else
				ram_access <= '0';
				fl_a <= sc_mem_out.address(18 downto 0);
				-- select flash type
				-- and keep it selected
				if trans_flash='1' then
					sel_flash <= '1';
				else
					sel_flash <= '0';
				end if;
			end if;
		end if;
		if sc_mem_out.wr='1' then
			if trans_ram='1' then
				ram_dout <= sc_mem_out.wr_data;
			else
				flash_dout <= sc_mem_out.wr_data(7 downto 0);
			end if;
		end if;
		if ram_data_ena='1' then
			ram_data <= ram_din;
		end if;
		if flash_data_ena='1' then
			-- signal NAND rdy only for NAND access
			nand_rdy <= fl_rdy and not sel_flash;
			flash_data <= fl_d;
		end if;

	end if;
end process;

--
--	MUX registered RAM and Flash data
--
process(ram_access, ram_data, flash_data, nand_rdy)

begin
	if (ram_access='1') then
		sc_mem_in.rd_data <= ram_data;
	else
		sc_mem_in.rd_data <= std_logic_vector(to_unsigned(0, 32-9)) & nand_rdy & flash_data;
	end if;
end process;

--
--	next state logic
--
process(state, sc_mem_out, trans_ram, wait_state)

begin

	next_state <= state;

	case state is

		when idl =>
			if sc_mem_out.rd='1' then
				if trans_ram='1' then
					if ram_ws=0 then
						-- then we omit state rd1!
						next_state <= rd2;
					else
						next_state <= rd1;
					end if;
				else
					next_state <= fl_rd1;
				end if;
			elsif sc_mem_out.wr='1' then
				if trans_ram='1' then
					next_state <= wr1;
				else
					next_state <= fl_wr1;
				end if;
			end if;

		-- the WS state
		when rd1 =>
			if wait_state=2 then
				next_state <= rd2;
			end if;

		-- last read state
		when rd2 =>
			next_state <= idl;
			-- This should do to give us a pipeline
			-- level of 2 for read
			-- we don't care about a flash trans.
			-- in the pipeline!
			if sc_mem_out.rd='1' then
				if ram_ws=0 then
					-- then we omit state rd1!
					next_state <= rd2;
				else
					next_state <= rd1;
				end if;
			elsif sc_mem_out.wr='1' then
				next_state <= wr1;
			end if;
			
		-- the WS state
		when wr1 =>
			if wait_state=2 then
				next_state <= wr2;
			end if;

		-- last write state
		when wr2 =>
			next_state <= idl;

		when fl_rd1 =>
			if wait_state=2 then
				next_state <= fl_rd2;
			end if;

		when fl_rd2 =>
			next_state <= idl;
			-- we do no pipelining with the Flashs

		when fl_wr1 =>
			if wait_state=2 then
				next_state <= fl_wr2;
			end if;

		when fl_wr2 =>
			next_state <= idl;

	end case;
				
end process;

--
--	state machine register
--	output register (RAM, Flash control lines)
--
process(clk, reset)

begin
	if (reset='1') then
		state <= idl;
		dout_ena <= '0';
		ram_ncs <= '1';
		ram_noe <= '1';
		ram_data_ena <= '0';
		ram_nwe <= '1';

		fl_noe <= '1';
		fl_nwe <= '1';
		flash_data_ena <= '0';
		fl_dout_ena <= '0';

	elsif rising_edge(clk) then

		state <= next_state;
		dout_ena <= '0';
		ram_ncs <= '1';
		ram_noe <= '1';
		ram_data_ena <= '0';
		ram_nwe <= '1';

		fl_noe <= '1';
		fl_nwe <= '1';
		flash_data_ena <= '0';
		fl_dout_ena <= '0';

		case next_state is

			when idl =>

			-- the wait state
			when rd1 =>
				ram_ncs <= '0';
				ram_noe <= '0';

			-- last read state
			when rd2 =>
				ram_ncs <= '0';
				ram_noe <= '0';
				ram_data_ena <= '1';
				
				
			-- the WS state
			when wr1 =>
				ram_nwe <= '0';
				dout_ena <= '1';
				ram_ncs <= '0';
				
			-- last write state
			when wr2 => 
				dout_ena <= '1';
				ram_ncs <= '0';

			when fl_rd1 =>
				fl_noe <= '0';

			when fl_rd2 =>
				fl_noe <= '0';
				flash_data_ena <= '1';

			when fl_wr1 =>
				fl_nwe <= '0';
				fl_dout_ena <= '1';

			when fl_wr2 =>
				fl_dout_ena <= '1';

		end case;
					
	end if;
end process;

--
-- wait_state processing
-- cs delay, dout enable
--
process(clk, reset)
begin
	if (reset='1') then
		wait_state <= (others => '1');
		cnt <= "00";
	elsif rising_edge(clk) then

		wait_state <= wait_state-1;

		cnt <= "11";
		if next_state=idl then
			cnt <= "00";
		-- if wait_state<4 then
		elsif wait_state(3 downto 2)="00" then
			cnt <= wait_state(1 downto 0)-1;
		end if;

		if sc_mem_out.rd='1' then
			if trans_ram='1' then
				wait_state <= to_unsigned(ram_ws+1, 4);
				if ram_ws<3 then
					cnt <= to_unsigned(ram_ws+1, 2);
				else
					cnt <= "11";
				end if;
			else
				wait_state <= to_unsigned(rom_ws+1, 4);
				cnt <= "11";
			end if;
		end if;
		
		if sc_mem_out.wr='1' then
			if trans_ram='1' then
				wait_state <= to_unsigned(ram_ws_wr+1, 4);
				if ram_ws_wr<3 then
					cnt <= to_unsigned(ram_ws_wr+1, 2);
				else
					cnt <= "11";
				end if;
			else
				wait_state <= to_unsigned(rom_ws+1, 4);
				cnt <= "11";
			end if;
		end if;

	end if;
end process;

--
--	Flash signals
--

--
--	leave last ncs. Only toggle between two flashs.
--
	fl_ncs <= not sel_flash;	-- Flash ncs
	fl_ncsb <= sel_flash;		-- NAND ncs

--
--	tristate output
--
process(fl_dout_ena, flash_dout)

begin
	if (fl_dout_ena='1') then
		fl_d <= flash_dout(7 downto 0);
	else
		fl_d <= (others => 'Z');
	end if;
end process;

end rtl;
@


1.9
log
@SRAM interface correction - longer data out enable
@
text
@d96 1
a96 1
							idl, rd1, rd2, wr1, wr2, wr3,
d276 5
a280 2
			next_state <= wr2;
		
a281 3
			next_state <= wr3;
			
		when wr3 =>
d361 1
a362 5
				ram_nwe <= '0';
				dout_ena <= '1';
				ram_ncs <= '0';
				
			when wr3 =>
@


1.8
log
@no message
@
text
@d357 2
d367 1
@


1.7
log
@nwe on posedge, write additional wait state
@
text
@d23 1
a23 1
--	sc_sram32_flash_wr_2ws.vhd
a101 1
	signal nwr_int		: std_logic;
@


1.6
log
@JOP goes GPL
@
text
@d23 1
a23 1
--	sc_sram32_flash.vhd
d44 1
a44 1
--
d96 1
a96 1
							idl, rd1, rd2, wr1,
d122 2
d127 2
a220 15
--	'delay' nwe 1/2 cycle -> change on falling edge
--
process(clk, reset)

begin
	if (reset='1') then
		ram_nwe <= '1';
	elsif falling_edge(clk) then
		ram_nwe <= nwr_int;
	end if;

end process;


--
d277 7
a283 7
-- TODO: check what happens on ram_ws=0
-- TODO: do we need a write pipelining?
--	not at the moment, but parhaps later when
--	we write the stack content to main memory
			if wait_state=1 then
				next_state <= idl;
			end if;
d319 1
d333 1
d359 3
d363 4
a387 14
--	nwr combinatorial processing
--	for the negativ edge
--
process(next_state, state)
begin

	nwr_int <= '1';
	if next_state=wr1 then
		nwr_int <= '0';
	end if;

end process;

--
d408 1
a408 1
		if sc_mem_out.rd='1' or sc_mem_out.wr='1' then
d421 14
@


1.5
log
@Cleanup of SimpCon types
@
text
@d2 21
@


1.4
log
@no message
@
text
@d42 1
a42 1
	sc_mem_out		: in sc_mem_out_type;
d104 1
a104 1
	assert MEM_ADDR_SIZE>=21 report "Too less address bits";
@


1.3
log
@VHDL restructure: add jopcpu + records for SimpCon
@
text
@d213 1
a213 1
process(state, sc_mem_out.rd, sc_mem_out.wr, trans_ram, wait_state)
@


1.2
log
@Added Flash interface (SimpCon) for the cycore board
@
text
@d30 1
d33 1
a33 1
generic (ram_ws : integer; rom_ws : integer; addr_bits : integer);
d39 5
a43 7
-- SimpCon interface

	address		: in std_logic_vector(addr_bits-1 downto 0);
	wr_data		: in std_logic_vector(31 downto 0);
	rd, wr		: in std_logic;
	rd_data		: out std_logic_vector(31 downto 0);
	rdy_cnt		: out unsigned(1 downto 0);
d104 1
a104 1
	assert addr_bits>=21 report "Too less address bits";
d107 1
a107 1
	rdy_cnt <= cnt;
d113 1
a113 1
process(address(20 downto 19))
d119 1
a119 1
	case address(20 downto 19) is
d147 1
a147 1
		if rd='1' or wr='1' then
d150 1
a150 1
				ram_addr <= address(17 downto 0);
d153 1
a153 1
				fl_a <= address(18 downto 0);
d163 1
a163 1
		if wr='1' then
d165 1
a165 1
				ram_dout <= wr_data;
d167 1
a167 1
				flash_dout <= wr_data(7 downto 0);
d189 1
a189 1
		rd_data <= ram_data;
d191 1
a191 1
		rd_data <= std_logic_vector(to_unsigned(0, 32-9)) & nand_rdy & flash_data;
d213 1
a213 1
process(state, rd, wr, trans_ram, wait_state)
d222 1
a222 1
			if rd='1' then
d233 1
a233 1
			elsif wr='1' then
d254 1
a254 1
			if rd='1' then
d261 1
a261 1
			elsif wr='1' then
d403 1
a403 1
		if rd='1' or wr='1' then
@


1.1
log
@SimpCon memory interface for Cycore board
@
text
@d11 3
a13 3
--		000000-x7ffff	external SRAM (w mirror)	max. 512 kW (4*4 MBit)
--		080000-xfffff	external Flash (w mirror)	max. 512 kB (4 MBit)
--		100000-xfffff	external NAND flash
a18 1
--		make a version with Flash interface
d22 1
d32 1
a32 1
generic (ram_ws : integer; rom_cnt : integer; addr_bits : integer);
d76 2
a77 2
							idl, rd1, rd2,
							wr1
d87 15
a101 1
	signal rd_data_ena	: std_logic;
d105 1
d111 21
d140 5
a144 1
		rd_data <= (others => '0');
d149 14
a162 1
			ram_addr <= address(17 downto 0);
d165 8
a172 1
			ram_dout <= wr_data;
d174 4
a177 2
		if rd_data_ena='1' then
			rd_data <= ram_din;
d184 13
a203 1
--		ram_noe <= '1';
a205 1
--		ram_noe <= noe_int;
d214 1
a214 1
process(state, rd, wr, wait_state)
a219 1

d224 7
a230 3
				if ram_ws=0 then
					-- then we omit state rd1!
					next_state <= rd2;
d232 1
a232 1
					next_state <= rd1;
d235 5
a239 1
				next_state <= wr1;
d252 3
a254 1
			-- level of 1 for read
d276 17
d299 1
a299 1
--	output register
d309 7
a315 1
		rd_data_ena <= '0';
d322 6
a327 1
		rd_data_ena <= '0';
d342 1
a342 1
				rd_data_ena <= '1';
d350 14
d404 8
a411 13
		if rd='1' then
			wait_state <= to_unsigned(ram_ws+1, 4);
			if ram_ws<3 then
				cnt <= to_unsigned(ram_ws+1, 2);
			else
				cnt <= "11";
			end if;
		elsif wr='1' then
			-- one more cycle for the write
			-- But in original mem32 this was only true
			-- for ram_cnt=2!
			if ram_ws<3 then
				cnt <= to_unsigned(ram_ws+1, 2);
d413 1
a415 7
			wait_state <= to_unsigned(ram_ws+1, 4);
--		else
--			-- do we need this?
--			-- we don't care about wait_state in state idle
--			if state=idl then
--				wait_state <= (others => '1');			-- keep it on max value
--			end if;
a417 1

d421 14
a434 1
-- TODO: move Flash interface to a second WB interface
d436 7
a442 7
	fl_a <= (others => '0');
	fl_d <= (others => 'Z');
	fl_ncs <= '1';
	fl_ncsb <= '1';
	fl_noe <= '1';
	fl_nwe <= '1';
--	fl_rdy	: in std_logic
@

