	page	240, 132
;NATIVEPX.ASM	31-Oct-2011
;
;	**********************************************************
;	**							**
;	**     32-BIT XPL0 NATIVE LANGUAGE RUNTIME PACKAGE      **
;	**   FOR 80386 MACHINES WITH A 80387 MATH COPROCESSOR   **
;	**                     (OR BETTER)                      **
;	**							**
;	**             COPYRIGHT LOREN BLANEY 2011              **
;	**    BASED ON CODE BY LARRY FISH AND P.J.R. BOYLE      **
;	**           CONTRIBUTIONS BY RICHARD OTTOSEN           **
;	**      PMODE MODULE BY TRAN (A.K.A: THOMAS PYTEL)      **
;	**							**
;	**********************************************************
;
;Assemble with MASM version 6.11 (or later). (TASM 3.1 can't handle this.)
;
;This program is free software; you can redistribute it and/or modify it under
; the terms of the GNU General Public License version 2 as published by the
; Free Software Foundation.
;This program is distributed in the hope that it will be useful, but WITHOUT
; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
; details.
;You should have received a copy of the GNU General Public License along with
; this program (in the file LICENSE.TXT); if not, write to the Free Software
; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
;
;You can reach the authors at: loren.blaney@gmail.com
;
;REVISIONS:
;V3.0x, JUL-20-99, Modified to run with PMODE.ASM. Fixed Clear intrinsic for
; mode 6A. Added intrinsics IRQ (80) and Ranseed (79). Fixed bug in Finp
; routine when exponent is > 255. Fix bug in Conout that returns al = CR after
; outputting FF.
;V3.0, JUL-28-99, Fix Resize routine so that Malloc can be used.
;V3.01, APR-23-00, Change runtime error messages.
;V3.02, 10-FEB-2001, Fixed bug in Rlout where ":" could be displayed instead
; of "9". Fixed bug where runtime error number could be output to some device
; other than 0.
;V3.10, 9-JUN-2003, Modified for XPLPX: Preserve esi & edi registers, condi-
; tional assembly makes edi the heap pointer (HP), making the "X" version of
; NATIVEP.ASM. Fixed bug in Hilite intrinsic (clear high word of ecx). Initial-
; ize device 6 window to actual screen size (not necessarily 80x25). Add Irq
; intrinsic (=80). Optimize declared arrays (mkarray). Fix rounding error in
; RlOut.
;14-Jul-2004, Fixed SetWind intrinsic (70) and device 6 to have correct
; background fill and scroll color when in a graphic video mode. Point
; intrinsic is 5 times faster for VGA planar modes (0Dh-12h). Compensate
; for nVidia bug that fills graphic screen with vertical bars.
;16-Jan-2005, Modify mkarray to fix global array declarations in a split heap.
; Clear intrinsic for VESA. Modify Peek, Poke and Blit to work with Windows XP
; (which doesn't allow negative offsets from DS to access the PSP, etc.). Fix
; Malloc, Release, TrapC and TestC intrinsics. Streamline Sin, Cos & Tan
; intrinsics. Cut .exe file sizes in half by moving buffers to natbuf.asm.
; Blit moves 4 bytes at a time. Added Paint intrinsic (81). Use all available
; memory for heap. Fast ReadPix intrinsic works for VESA and VGA (but is slow
; for planar modes). Alternate Text intrinsic for null-terminated strings.
; Device 6 handles the display of characters when in VESA graphic modes. Round
; mkarray and Reserves up to next dword boundary.
;2-Mar-2005, Reenable interrupts after a DOS call.
;2-Jun-2006, Fix masking bug in ReadPix intrinsic for 3 bytes per pixel. Clear
; intrinsic sets text cursor position to 0,0 for VESA modes. If a runtime error
; occurs while in a graphic mode then text mode is set (so the error message can
; be seen if it was a VESA graphic mode).
;16-Jul-2006, Fix bug in SoftInt intrinsic: wrong status flags were returned.
; Fix similar bug in Chain intrinsic that returned wrong status for carry flag.
; Fix bug in Point intrinsic when plotting 24-bit color on some computers. The
; Line intrinsic works for more than 8 bits of color (in which case dashed lines
; are defined by bits 31..24 instead of 15..8). Allow VESA modes $180..$1FF.
;2-Sep-2006, Initialize all data locations, i.e. replace '?' with '0'.
;9-Sep-2006, Add clipping to Point intrinsic for VESA graphic modes.
;25-Feb-2007, Fixed a bug where a program had more than 64K of constant data
; (such as with constant arrays or strings) and did file I/O or called SetVid.
; Buffers in natbuf.asm would overwrite the constant data. Removed obsolete
; "multitask" conditional-assembly option for less clutter. WARNING: Do not use
; /m or /l with TASM 3.1 or 4.1. They do not work properly when dseg is > 64K.
; Fix background colors for mode 13h on nVidia card.
; Report address where runtime errors occur.
; Zero the global heap area too.
;17-Mar-2007, Fix readPix intrinsic for ATI cards (which read from window B).
; Disable memory zeroing stuff and runtime error addresses (marked "debug").
;26-Apr-2007, VESA mode text (on device 6) can be positioned to any pixel using
; the Move intrinsic (it's no longer limited to character cell boundaries).
; Device 6 displays an 8x16 font and device $106 displays an 8x8 font. Attrib
; intrinsic for 16 (and 15) bit graphic modes uses high word for background and
; low word for foreground colors. 24-bit graphics always has a black background.
; SetVid intrinsic sets default attribute to white on black.
;28-Mar-2008, Add Read and Write intrinsics; ignore underlines in IntIn, HexIn
; and RlIn; add GetTime and Backup intrinsics. Return run-time errors to DOS.
;16-Apr-2008, Fix incompatibility between SetVid intrinsic and Matrox BIOS.
;21-Apr-2009, Enable device 6's font in VESA graphic modes to be changed by
; redirecting interrupt 43h (BIOS int 10h, func 11h, sub-func 21h).
;28-Jun-2009, Clear intrinsic sets VESA's text cursor to 0,0.
;10-Jan-2010, Fix Restart intrinsic for Windows XP.
;17-Jul-2011, Fix SetVid intrinsic, which set more text lines than would fit on
; a screen.
;31-Oct-2011, Replace random number generator with a much better one.
;
;CODING CONVENTIONS:
;Subroutines do not destroy the contents of any registers. If a register is
; changed, this is stated as part of the output of the subroutine.
;
;Intrinsics assume that "the top of stack" (tos) is in eax, with any additional
; arguments on the actual hardware stack. Intrinsics used as functions return
; their value in eax. Intrinsics may use eax and edx as scratch registers, but
; all other registers must be unchanged.
;Intrinsics that use real numbers use st(0) in the math coprocessor to hold TOS.
;The fs register always points to segment 0, to allow access to BIOS variables.
;
;The following is used to conditionally assemble code for the optimized version
; of the compiler, XPLPX. It causes edi to be used to hold the heap pointer (hp)
; NATIVEP.OBJ should be renamed NATIVEPX.OBJ.
flagopt	equ	1			;set true=1 or false=0
;
;The following is used to enable the math coprocessor. If mathcop is false, 
; NATIVEP.OBJ is renamed NATIVEPI.OBJ (Integer-only).
mathcop	equ	1			;1 = math coprocessor present

	.386p
	.387

	extrn	progrm:near		;entry point to compiled XPL code
	extrn	heaplo:dword
	extrn	dinbuf:byte, dotbuf:byte, psubuf:byte, modeBlk:byte

;The compiled XPL code needs these:
	public	stkptr, hp, remain, fptemp
	public	base0,  base1,  base2,  base3,  base4,  base5,  base6,  base7
	public	intr0,  intr1,  intr2,  intr3,  intr4,  intr5,  intr6,  intr7
	public	intr8,  intr9,  intr10, intr11, intr12, intr13, intr14, intr15
	public	intr16, intr17, intr18, intr19, intr20, intr21, intr22, intr23
	public	intr24, intr25, intr26, intr27, intr28, intr29, intr30, intr31
	public	intr32, intr33, intr34, intr35, intr36, intr37, intr38, intr39
	public	intr40, intr41, intr42, intr43, intr44, intr45, intr46, intr47
	public	intr48, intr49, intr50, intr51, intr52, intr53, intr54, intr55
	public	intr56, intr57, intr58, intr59, intr60, intr61, intr62, intr63
	public	intr64, intr65, intr66, intr67, intr68, intr69, intr70, intr71
	public	intr72, intr73, intr74, intr75, intr76, intr77, intr78, intr79
	public	intr80, intr81, intr82, intr83, intr12a
	public	mkarray

;-------------------------------------------------------------------------------

stacklen equ	4000h 			;size of stack in bytes
					;BEWARE OF TASM BUG IF STACKLEN >=10000h
PMODE_TEXT	segment para public use16 'code'
PMODE_TEXT	ends
cseg		segment para public use32 'code'
cseg		ends
dseg		segment para public use32 'data'
dseg		ends
exe_stack	segment para stack use16 'stack'
exe_stack	ends

extrn	_pm_info:far, _pm_init:far	;in PMODE.ASM

PMODE_TEXT	segment para public use16 'code'
		assume	cs:PMODE_TEXT, ds:PMODE_TEXT
		align 2
errmsgtbl	dw	errmsg0, errmsg1, errmsg2, errmsg3
		dw	errmsg4, errmsg5, errmsg6

errmsg0		db	'Not enough low memory', 13, 10, 36
errmsg1		db	'80386 (or better) not detected', 13, 10, 36
errmsg2		db	'System already in protected mode and no VCPI or DPMI'
		db	' found', 13, 10, 36
errmsg3		db	'DPMI host is not 32-bit', 13, 10, 36
errmsg4		db	'Could not enable A20 gate', 13, 10, 36
errmsg5		db	'Could not enter DPMI 32-bit protected mode', 13, 10, 36
errmsg6		db	'Could not allocate needed DPMI selectors', 13, 10, 36

;-------------------------------------------------------------------------------
;Table of registers used for intrinsics SOFTINT (34) and GETREG (35). DOS calls
; only use the low word. The high word forms a 32-bit value for XPL code.
;
dseg	segment	para public use32 'data'
	align	4
cpureg	label	word			;define the register addresses
axreg	dw	0, 0			;0 ax register
bxreg	dw	0, 0			;1 bx register
cxreg	dw	0, 0			;2 cx register
dxreg	dw	0, 0			;3 dx register
direg	dw	0, 0			;4 di register
sireg	dw	0, 0			;5 si register
bpreg	dw	0, 0			;6 bp register
cflag	dd	0			;7 carry flag

csreg	dw	0, 0			;8 cs register
dsreg	dw	0, 0			;9 ds register
ssreg	dw	0, 0			;10 ss register
esreg	dw	0, 0			;11 es register

dsgmnt	dw	0, 0			;12 holds current data segment

dosint	db	0, 0, 0, 0		;13 last called DOS interrupt
dosfun	db	0, 0, 0, 0		;14 last called DOS function
dosret	dw	0, 0			;15 last DOS error code
flagreg	dw	0, 0			;16 flag register
dseg	ends

;===============================================================================
;				ENTRY POINT
;===============================================================================
;When DOS starts this (XPL) program ds = es and they point to the Program
; Segment Prefix (PSP). ss = exe_stack, sp = stacklen-2.
;
;The registers are saved so the XPL program can access them using the GETREG 
; intrinsic (35), or in case the XPL program is used as an interrupt routine.
;
start:	push	ds			;save old data segment
	push	ax			;save ax
	mov	ax, dseg		;point ds to our data
	mov	ds, ax
	assume	ds:dseg
	pop	word ptr axreg		;save all registers
	pop	word ptr dsreg
	mov	bxreg, bx
	mov	cxreg, cx
	mov	dxreg, dx
	mov	sireg, si
	mov	direg, di
	mov	bpreg, bp
;	mov	entstk, sp		;save entry stack pointer
	mov	csreg, cs
	mov	ssreg, ss
	mov	esreg, es
	mov	pspseg, es
	pushf				;save status flags
	pop	word ptr flagreg

	push	cs 			;ds:= cs
	pop	ds
	assume	ds:PMODE_TEXT

	call	_pm_info		;get pmode information:
					; ax = error code
					; bx = paragraphs needed by pmode module
					; (cl = processor type; ch = pmode type)
	jnc	short start2		;jump if no error

start1:	mov	si, ax			;print error message for code in ax
	add	si, ax			;double for word entries
	mov	dx, errmsgtbl[si]	;get message from table
	mov	ah, 09h			;print it
	int	21h
	mov	ax, 4CFFh		;exit back to DOS with error code = 0FFh
	int	21h
start2:
	mov	cx, ss			;cx = base of pmode usage (segment addr)
	add	cx, stacklen/16		;BEWARE OF TASM BUG IF STACKLEN >=10000h
	mov	dx, cx			;get base of pmode usage
	add	dx, bx			;plus amount of pmode usage
	cmp	dx, es:[2]		;compare to total available low memory
	mov	ax, 0			;if error then "not enough low memory"
	jae	short start1		;jump if error

;shrink (low) memory allocated by DOS down to what's actually needed
	mov	ax, es			;amount of memory needed =
	sub	bx, ax			; pmode base - PSP + pmode usage
	add	bx, cx
	mov	ah, 4ah			;DOS function: modify memory allocation
	int	21h
	mov	ax, 0			;if error then "not enough low memory"
	jc	start1			;jump if error

	mov	es, cx			;base of pmode usage
	call	_pm_init		;enter protected mode
	jc	short start1		;jump if error

; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
;Protected mode
;
;Set up 32-bit, 4GB code descriptor
	mov	cx, 1			;allocate 1 descriptor
	xor	ax, ax			;DPMI function 0000: Allocate descriptor
	int	31h

	mov	bx, ax			;bx:= descriptor allocated
	mov	dx, 0FFFFh		;set size to 4G
	mov	cx, dx
	mov	ax, 8			;DPMI: set segment limit
	int	31h

	mov	edx, cseg		;base address = 32-bit code segment
	shl	edx, 4
	shld	ecx, edx, 16
	mov	ax, 7			;DPMI: set segment base address
	int	31h

	mov	ax, cs			;set descriptor type to 32-bit code
	lar	cx, ax			; at the current CPL
	mov	cl, ch
	mov	ch, 0C0h
	mov	ax, 9			;DPMI: set descriptor access rights
	int	31h

	mov	bp, bx			;save selector in bp

;Set up 32-bit, 4GB data descriptor
	mov	bx, ds			;BX = current data selector
	mov	dx, 0FFFFh		;set size to 4G
	mov	cx, dx
	mov	ax, 8			;DPMI: set segment limit
	int	31h

	mov	edx, dseg		;base address = 32-bit data segment
	shl	edx, 4
	shld	ecx, edx, 16
	mov	ax, 7			;DPMI: set segment base address
	int	31h

;Set up other descriptors
	cli				;don't interrupt while changing SS:ESP
	mov	ds, bx			;set all segment registers to data
	mov	es, bx			; selector
	mov	gs, bx
	mov	ss, bx

	mov	eax, exe_stack		;adjust ESP for new base of stack
	mov	ebx, dseg
	sub	eax, ebx
	shl	eax, 4
	add	esp, eax
	sti

;Set up 32-bit, 4GB code descriptor for fs
	mov	cx, 1			;allocate 1 descriptor
	xor	ax, ax			;DPMI function 0000: allocate descriptor
	int	31h

	mov	bx, ax			;bx:= descriptor allocated
	mov	dx, 0FFFFh		;set size to 4G
	mov	cx, dx
	mov	ax, 8			;DPMI: set segment limit
	int	31h

	xor	edx, edx		;base address = 0
	xor	ecx, ecx
	mov	ax, 07h			;DPMI: set segment base address
	int	31h
	mov	fs, bx

	push	ebp			;push target CS:EIP in 32-bit code
	db	66h, 68h
	dd	offset start32

	db	66h			;32-bit RETF to 32-bit code
	retf

;-------------------------------------------------------------------------------
;Control-c and control-break interrupt handler set up by TrapC intrinsic (75).
; (When this handler is called, the processor is in 16-bit real mode.)
;
ctrbrk:	push	ax	 		;save registers
	push	ds

	mov	ax, dseg
	mov	ds, ax
	assume	ds:dseg
	or	dword ptr ctcflg, -1	;indicate ctrl-c or ctrl-break is struck

	pop	ds			;restore registers
	pop	ax
	iret
	assume	ds:PMODE_TEXT

PMODE_TEXT	ends

;-------------------------------------------------------------------------------

cseg	segment para public use32 'code'
	assume	cs:cseg, ds:dseg

dseg	segment	para public use32 'data'
;debug
;msg0	db	'Hello World from 32-bit Protected Mode...', 13, 10
;	db	'Press any key to continue...', 13, 10, 36

		align 4
regs_edi	label	dword
regs_di	dw	0, 0
regs_esi	label	dword
regs_si	dw	0, 0
regs_ebp	label	dword
regs_bp	dw	0, 0
		dd	0
regs_ebx	label	dword
regs_bx	label	word
regs_bl	db	0
regs_bh	db	0, 0, 0
regs_edx	label	dword
regs_dx	label	word
regs_dl	db	0
regs_dh	db	0, 0, 0
regs_ecx	label	dword
regs_cx	label	word
regs_cl	db	0
regs_ch	db	0, 0, 0
regs_eax	label	dword
regs_ax	label	word
regs_al	db	0
regs_ah	db	0, 0, 0
regs_flags	dw	0
regs_es	dw	0
regs_ds	dw	0
regs_fs	dw	0
regs_gs	dw	0
regs_ip	dw	0
regs_cs	dw	0
regs_sp	dw	0
regs_ss	dw	0

dataselector	dw	0

stkptr	dd	0			;stack pointer at XPL code entry
entstk	dw	0			;sp at program entry, intrinsics 66 & 67

pspseg	dw	0			;location of current PSP
dvzvecs	dw	0			;original div-by-0 vector selector
dvzveco	dd	0			; offset
nowdev	dw	0			;current I/O device number
traps	dw	0			;runtime error trap flags
erraddr	dd	0			;address of runtime error
errnum	db	0			;error code number

	align	4
memhand	dd	0			;handle used to release high heap memory
heaphi	dd	0			;pointer to top of heap (highest addr)
hp	dd	0			;heap pointer (edi is used if flagopt=1)
remain	dd	0			;remainder from last integer divide
rerunf	dd	0			;rerun flag, set by restart, intrinsic 6

;Display vector. These point to the base of variables in the heap, one for each
; level of static nesting. In practice, base3 and above are rarely used.
base0	dd	0			;base of global variables
base1	dd	0			;base of local variables at level 1
base2	dd	0
base3	dd	0
base4	dd	0
base5	dd	0
base6	dd	0
base7	dd	0

;Define some ASCII characters
bel	equ	07h			;bell
cr	equ	0Dh			;carriage return
lf	equ	0Ah			;line feed
eof	equ	1Ah			;end of file
tsp	equ	' '+80h			;define terminating space character
dseg	ends

;===============================================================================

start32:

;debug msg:
;	pusha
;	mov	regs_ah, 09h		;function code 09h: put string
;	mov	regs_ds, dseg		;set DS:DX for DOS string put
;
;	mov	eax, offset msg0
;	mov	regs_dx, ax
;
;	mov	edi, offset regs_edi	;point to regs
;	xor	ecx, ecx		;no parameters on stack
;	mov	regs_ss, cx		;SS:SP = 0, PMODE will provide stack
;	mov	regs_sp, cx
;	mov	bx, 21h			;call interrupt 21h
;	mov	ax, 300h		;int 31h function 0300h
;	int	31h			;do the call to real mode
;
;	mov	ah, 0			;wait for keypress
;	int	16h
;	popa
;end debug


;	xor	eax, eax		;set up pointer to top of heap
;	mov	ax, es:0002h		;get segment addr of top of prog's mem
;	mov	dx, ds			;subtract data segment
;	sub	ax, dx
;	shl	eax, 4			;convert segment addr to absolute addr
;	mov	heaphi, eax
;Should it be verified that the buffers don't exceed top of prog's memory?

	xor	eax, eax		;no errors yet
	mov	erraddr, eax
	mov	errnum, al

	mov	traps, 0FFFFh		;trap all errors

;Allocate available memory and put the heap there
	sub	esp, 2Ch		;allocate buffer space on stack
	pushd	100000h			;set default size to 1 MB
	mov	edi, esp		;point to (first dword in) buffer
	mov	ax, 0500h		;DPMI: get free memory information
	int	31h			;(es=ds)

	mov	ecx, [edi+8]		;get maximum locked page allocation
	shl	ecx, 12			;assume page has 4K granularity
	jnc	start08			;ecx = FFFFF000h unless Windows DPMI
	 mov	ecx, [edi]		;get largest available free block
start08:shld	ebx, ecx, 16		;load bx with high 16 bits in ecx
	add	esp, 30h		;discard buffer space
	push	ecx			;save amount to allocate

	mov	ax, 0501h		;DPMI: allocate extended memory block
	int	31h
	jnc	short start10
	 mov	errnum, 2		;memory error
	 pop	ecx			;balance stack
	 jmp	start80
start10:mov	word ptr hp, cx		;hp:= bx:cx
	mov	word ptr hp+2, bx

	mov	word ptr memhand, di	;save handle (for releasing) = si:di
	mov	word ptr memhand+2, si

;hp is an absolute physical address in memory. Since all of XPL's usage of hp
; is relative to dseg (=ds), dseg must be subtracted here (because it will be
; automatically added to every reference to hp later on).
	mov	eax, hp			;adjust for ds base
	mov	edx, dseg		;hp:= hp - dseg*16
	shl	edx, 4
	sub	eax, edx
	mov	hp, eax

	mov	base0, eax		;set level 0 display pointer
	pop	ecx			;restore amount allocated
	add	eax, ecx
	dec	eax
	mov	heaphi, eax

;	mov	eax, offset heaplo	;set heap pointer to start of heap
;	mov	hp, eax
;	mov	base0, eax		;set level 0 display pointer
;should base0 be set to point to the low heap?

start20:call	setdvz			;set up divide-by-zero vector
	call	setint			;initialize intrinsics
	call	timerop			;set up system timer 0
	if	mathcop
	 call	tst387			;initialize math coprocessor
	endif


 if 0
;Warning: If this is enabled then the Restart intrinsic may not work as expected.
;debug
;zero memory
	xor	eax, eax
	mov	ebx, hp
	mov	ecx, 4000000		;number of bytes to zero
zzz10:	mov	[ebx], al
	inc	ebx
	dec	ecx
	jne	short zzz10
;end debug

;zero the global heap area too
	mov	ecx, 1600*8
	mov	ebx, offset heaplo
zzz20:	mov	[ebx], al
	inc	ebx
	dec	ecx
	jne	short zzz20
 endif


	if	flagopt
	 mov	edi, hp			;edi is the heap pointer (hp)
	 mov	esi, offset heaplo
	endif

	call	startxpl		;run the XPL program

	cmp	errnum, 0		;did a runtime error occur?
	je	short start90		;skip if not
start80:
	mov	ah, 0Fh			;make sure video mode is for text
	int	10h			;so error message can be seen
	cmp	al, 03			;if GetVid # 3 then...
	je	start85
	 mov	ax, 0003h		;set normal text mode
	 int	10h			;SetVid 3
start85:
	lea	si, em0			;display runtime error message
	call	text			;e.g: "RUN-TIME ERROR 5: BAD INTRINSIC"
	movzx	eax, errnum		;display the number
	mov	byte ptr heaplo, al	;return error number to DOS
	mov	nowdev, 0
	call	intout
	mov	esi, errmsg[eax*4]-4	;index to appropriate message
	call	text

;	lea	esi, emat		; AT: $1234ABCD
;	call	text
;	mov	eax, erraddr
;	call	hex8out

;	call	crlf		;debug
;	mov	eax, cseg	;debug
;	call	hex8out		;debug


	call	crlf
start90:
	call	resdvz			;restore divide by 0 vector
	call	rstbrk			;restore break vector if TRAPC(true)
	call	timercl			;restore system timer 0

;Release extended memory space
	mov	di, word ptr memhand	;get handle to memory block
	mov	si, word ptr memhand+2
	mov	ax, 0502h		;DPMI: free memory block
	int	31h

	mov	ah, 4Ch			;exit back to DOS
	mov	al, byte ptr heaplo	;return low byte of global zero
	int	21h

;-------------------------------------------------------------------------------
;Set up and start the XPL program.
;
startxpl:
	mov	stkptr, esp		;save stack pointer
	jmp	progrm			;go to XPL code

;===============================================================================
;Set divide-by-zero interrupt vector to point to our routine (dvzhan).
;
setdvz:	pusha
	mov	bl, 0			;vector 0
	mov	ax, 0204h		;DPMI: get protected mode interrupt vect
	int	31h
	mov	dvzvecs, cx		;save original vector selector
	mov	dvzveco, edx		;save original vector offset

	mov	edx, offset dvzhan	;get address of our routine
	mov	cx, cs			;code selector
	mov	bl, 0			;vector 0
	mov	ax, 0205h		;DPMI: set protected mode interrupt vect
	int	31h
	popa
	ret

;-------------------------------------------------------------------------------
;Restore original divide-by-zero interrupt vector.
;
resdvz:	pusha
	mov	edx, dvzveco		;get original vector offset
	mov	cx, dvzvecs		;get original vector selector
	mov	bl, 0			;vector 0
	mov	ax, 0205h		;DPMI: set protected mode interrupt vect
	int	31h
	popa
	ret

;-------------------------------------------------------------------------------
;Exception handler for divide-by-zero error.
;
dvzhan:	mov	eax, [esp]		;get address of error
	mov	erraddr, eax
	mov	al, 1			;flag divide-by-zero error
	call	error			;handle error

;WARNING: This must match the code generated by the compiler: IDIV EBX
	mov	ebx, 1			;set divisor to 1 for repeated IDIV
	mov	eax, 07FFFFFFFh		;force big number quotient
	xor	edx, edx		; and zero remainder
	iretd

;-------------------------------------------------------------------------------
;Runtime error handler
; al contains the error number
;
error:	push	ecx			;save registers
	push	edx

	mov	errnum, al		;save the error number
	mov	dl, al			;set loop counter, dl, with error no.
	mov	cx, traps		;get trap flags

err10:	shr	cx, 1			;shift trap bits into carry
	dec	dl			;count error codes
	jne	short err10		;loop until zero (Larry's way)
	jnc	short err90		;if bit is clear then no trap--return
	mov	esp, stkptr		;else trap and return from XPL program
	ret				;return from anywhere

err90:	pop	edx
	pop	ecx			;restore registers
	ret				;near return to intrinsic

;-------------------------------------------------------------------------------
;Display the text string pointed to by si on the console.
; ax and si are destroyed.
;
text:	cld				;force increment mode
	jmp	short txtlp1		;enter loop

txtlp2:	call	conout			;out it goes
txtlp1:	mov	al, [si]
	inc	si
	test	al, al			;last char?
	jns	short txtlp2		;loop if not
	and	al, 7Fh			;mask to 7 bits
	jmp	conout			;(pjmp) output last char

;-------------------------------------------------------------------------------
;Output a carriage return and a line feed to the console.
; ax is destroyed.
;
crlf:	mov	al, cr
	call	conout
	mov	al, lf
	jmp	conout			;(pjmp)

;-------------------------------------------------------------------------------

dseg	segment	para public use32 'data'
;Error messages:
em0	db	bel, cr, lf, 'RUN-TIME ERROR', tsp

errmsg	dd	em1			;table of message pointers
	dd	em2
	dd	em3
	dd	em4
	dd	em5
	dd	em6
	dd	em7
	dd	em8
	dd	em9
	dd	em10
	dd	em11
	dd	em12
	dd	em13
	dd	em14
	dd	em15

em1	db	': DIV BY 0', tsp
em2	db	': OUT OF MEMORY', tsp
em3	db	': I/O', tsp
em4	db	': BAD OPCODE', tsp
em5	db	': BAD INTRINSIC', tsp
em6	db	': DIV BY 0.0', tsp
em7	db	': OVERFLOW', tsp
em8	db	': UNDERFLOW', tsp
em9	db	': FIX OVERFLOW', tsp
em10	db	': SQRT < 0.0', tsp
em11	db	': LOG <= 0.0', tsp
em12	db	': EXP OVERFLOW', tsp
em13	db	': OVERFLOW', tsp
em14	db	': ATAN2(0.0,0.0)', tsp
em15	db	': VIDEO MODE', tsp

emat	db	'AT:', tsp
dseg	ends

;===============================================================================
;Routine to reserve arrays with any number of dimensions
; This also sets the pointer to the heap space that is reserved
; For example: Mkarray( (3, 5, 7, 11), 4, 2);
; The stack contains (from bottom to top; the top of stack is actually in eax):
;  The number of elements in each dimension (3, 5, 7, 11)
;  The number of dimensions (4)
;  The number of bytes in each element of the array (1=char, 4=int, 8=real) =eax
;  (Only the last dimension of a char array has a single byte in it; the other
;  dimensions in a multidimensional char array all have 4-byte pointers.)
;
dseg	segment para public use32 'data'
levmax	dd	0		;maximum level of recursion (last dimension)
retadr	dd	0		;return address
esisave	dd	0		;save location for critical registers
edisave	dd	0		; (they can't be saved on the stack)
edibase	dd	0		;address of (1st dimension of) heap space
				; (for global array in a split heap)
dseg	ends

mkarray:pop	retadr		;save return address

	if	flagopt
	push	eax		;stack the number of bytes in each element
	mov	ebx, edi	;heap pointer points to pointer to array
	add	eax, 3		;allocate heap space for pointer to array
				;(because of split heap this wastes 4, or 8, 
				; bytes of heap space for global arrays)
	and	al, 0FCh	;eax:= 4, or 8 if real (rounds 1, for character
	add	edi, eax	; arrays up to 4)
	else
	pop	ebx		;get address of pointer to array
	endif

	mov	esisave, esi	;save critical registers
	mov	edisave, edi
	mov	edibase, edi

	pop	edi		;get number of bytes in each element
	pop	eax		;get number of dimensions
	dec	eax		;convert to maximum index off of ebp (levmax)
	shl	eax, 2		;qudruple for dword entries on the stack
	mov	ebp, eax
	add	ebp, esp	;point to number of elements in 1st dimension
	neg	eax
	mov	levmax, eax

	xor	esi, esi	;set index to point to size of first dimension
	call	genary		;reserve first dimension of array and store its
				; base address into location pointed to by ebx
	add	ebp, 4		;clean up stack--discard arguments
	mov	esp, ebp

	mov	esi, esisave	;restore critical registers
	mov	edi, edisave
	add	edi, 3		;round heap pointer up to next dword boundary
	and	edi, 0FFFFFFFCh	; (char arrays can misalign it)
	mov	eax, edibase	;return base address of array space
	push	retadr		;restore return address
	ret


;Recursively called subroutine for setting up a multi-dimensional array.
;Reserve an array and store its base address into the location pointed to
; by the ebx register.
;Register usage (* indicates input values):
;	eax - scratch
;  *	ebx - address of pointer to (next dimension of) array
;	ecx - loop counter and index
;	edx - base address of current dimension of array
;  *	edi - number of bytes in an element (1, 4 or 8)
;  *	esi - index to size of each dimension; also = level of recursion (* -4)
;  *	ebp - points to first argument on the stack (size of first dimension)

genary:	mov	eax, [ebp+esi]	;get number of elements for current dimension
	mov	ecx, eax	;save for possible use by loop counter below

;Reserve space on heap
	cmp	esi, levmax	;is this the last dimension?
	jg	ga02		;skip if not
	cmp	edi, 1		;is this a char array?
	je	ga07		;jump if so (last dimension is a single byte)
ga02:	cmp	edi, 8		;is this a real array?
	jne	ga05		;skip if not
	mov	dword ptr [ebx+4], 3FF00000h ;address goes into low end of 1.0
	add	eax, eax	;multiply elements by 8 bytes per real number
	jc	ga10		;check for memory overflow: error if > $FFFFFFFF
ga05:	add	eax, eax	;multiply elements by 4 bytes per integer
	jc	ga10		;check for memory overflow: error if > $FFFFFFFF
	add	eax, eax	;(this was more reasonable when 64K was limit)
	jc	ga10
	if	flagopt
ga07:	mov	edx, edisave	;edx = base of reserved heap space
	add	edisave, eax	;add number of bytes to reserve to heap pointer
	jc	ga10		;check for memory overflow: error if > $FFFFFFFF
	mov	eax, heaphi	;check for memory overflow
	cmp	edisave, eax
	else
ga07:	mov	edx, hp		;edx = base of reserved heap space
	add	hp, eax		;add number of bytes to reserve to heap pointer
	jc	ga10		;check for memory overflow: error if > $FFFFFFFF
	mov	eax, heaphi	;check for memory overflow if >= heaphi
	cmp	hp, eax
	endif
	jb	ga20		;skip if ok
ga10:	mov	al, 2		;error 2: memory overflow
	call	error
ga20:	mov	[ebx], edx	;store base address of reserved space into ptr

	cmp	esi, levmax	;is this the last dimension?
	jle	ga90		;jump if so--don't recurse any lower

	xor	eax, eax	;initialize index to first element
ga30:	cmp	eax, ecx	;is index beyond limit?
	jae	ga90		;jump if so--exit loop

	sub	esi, 4		;advance index to next dimension size on stack
	mov	ebx, eax	;get index for element in current dimension
	cmp	edi, 8		;is this a real array?
	jne	ga40		;skip if not
	add	ebx, ebx	;reserve space for 8 bytes
ga40:	shl	ebx, 2		;multiply by 4 to index by dwords
	add	ebx, edx	;add base address of reserved space, current dim

	push	eax		;save some things (index, limit, base addr)
	push	ecx
	push	edx
	call	genary		;reserve next dimension of array and store its
	pop	edx		; base address into location pointed to by ebx
	pop	ecx		;restore things
	pop	eax
	add	esi, 4		;restore index to previous dimension size on stk

	inc	eax		;next element
	jmp	short ga30	;loop for each element in this dimension

ga90:	ret

;===============================================================================
;                               INTRINSICS
;===============================================================================
;Start-up initialization for intrinsics module.
;
setint:	call	ranint			;reset random number generator seed
	call	resfullwnd		;reset device 6 window for full screen
	call	resdib			;reset disk buffer pointers (input)
	jmp	resdob			;(pjmp) (and output)

;-------------------------------------------------------------------------------
;Intrinsic to return the absolute value of top of stack.
; VAL:= ABS(VAL);
;
intr0:	test	eax, eax
	jns	short abs10
	neg	eax
abs10:	ret

;-------------------------------------------------------------------------------
;Intrinsic to generate a random number between 0 and (top of stack)-1.
; If tos = 0 then this initializes a seed for a repeatable sequence.
; If tos < 0 then this randomizes the seed and returns RAN(-tos).
; VAL:= RAN(10);	\e.g: random number from 0 through 9
; Algorithm is from Knuth's "Art of Computer Programming" Vol 2. 3rd ed. p. 185.
;
dseg	segment	para public use32 'data'
seed	dd	33049			;standard seed (prime number)
dseg	ends

intr1:	test	eax, eax		;range is in eax
	jne	short ran01		;skip if not zero
	 mov	seed, 33049		;else reinitialize with standard seed
	 ret
ran01:
	jns	short ran05
	 neg	eax			;make range positive
	 call	ranint			;randomize seed
ran05:
	push	ecx			;save register
	push	eax			;save range

; Seed:= A*rem(Seed/Q) - R*(Seed/Q);
;  M = 7fffffffh			;Mersenne prime
;  A = 48271				;"does well in spectral test"
;  Q = 44488 = M/A
;  R =  3399 = rem(M/A);		

	mov	eax, seed		;get Seed
	mov	ecx, 44488		;/Q
	cdq
	idiv	ecx			;eax(q),edx(r) := edx:eax/ecx
	imul	ecx, eax, 3399		;*R
	imul	eax, edx, 48271		;*A
	sub	eax, ecx

	jge	ran10			;if Seed < 0 then Seed:= Seed + M;
	 add	eax, 07fffffffh		;+M
ran10:	mov	seed, eax

	pop	ecx			;get range
	cdq				;return rem(Seed/range);
	idiv	ecx			;eax(q),edx(r) := edx:eax/ecx
	mov	eax, edx
	pop	ecx			;restore register
	ret

;-------------------------------------------------------------------------------
;Initialize random number generator.
; Uses system interrupt counter.
;
ranloc	equ	46ch			;location of timer counter

ranint:	push	eax			;save register

	mov	eax, fs:ranloc		;get random seed
	or	eax, 4			;make sure it's not zero
	mov	seed, eax		;set seed

	pop	eax			;restore register
	ret

;-------------------------------------------------------------------------------
;Intrinsic to set the seed for the random number generator, for a repeatable
; sequence. Setting the seed to 0 will always return a 0 random number. (Fug?)
;
intr79:	mov	seed, eax
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the remainder of the most recent division. The argument
; is an expression whose result is thrown away. This expression can contain
; a division, or simply be a zero to get the result of an earlier division.
; VAL:= REM(VAL/10);
;
intr2:	mov	eax, remain		;get last remainder
	ret

;-------------------------------------------------------------------------------
;Intrinsic to reserve bytes on the heap and return the address of the
; reserved space. This is the trick by which XPL handles dynamic storage
; for arrays. Since the space is reserved in the heap allocation of a
; procedure, the array will disappear when the procedure is exited.
; ARRAY:= RESERVE(10 *INTSIZE);
;
	if	flagopt
intr3:	 mov	edx, edi		;save heap pointer
	 add	edi, eax		;add in number of bytes to reserve
	 jc	short res80		;error if heap wraps around
	 add	edi, 3			;round up to next dword boundary
	 jc	short res80		;error if heap wraps around
	 and	edi, 0FFFFFFFCh
	 mov	eax, heaphi		;test for overlfow
	 cmp	edi, eax
	else
intr3:	 mov	edx, hp			;save heap pointer
	 add	hp, eax			;add in number of bytes to reserve
	 jc	short res80		;error if heap wraps around
	 mov	eax, heaphi		;test for overlfow
	 cmp	hp, eax
	endif
	jb	short res90		;skip if ok
res80:	mov	al, 2			;runtime error # 2: out of memory
	call	error
res90:	mov	eax, edx		;return address of reserved space
	ret

;-------------------------------------------------------------------------------
;Intrinsic to swap the two low bytes of top of stack
; VAL:= SWAP(VAL);
;
intr4:	xchg	al, ah
	ret

;-------------------------------------------------------------------------------
;Intrinsic to extend the sign of the low byte into the high bytes
; VAL:= EXTEND(VAL);
;
intr5:	movsx	eax, al
	ret

;-------------------------------------------------------------------------------
;Intrinsic to restart the current program
; RESTART;
;
intr6:	or	rerunf, -1		;set rerun flag
	mov	eax, base0		;reset heap pointer to start of heap
	mov	hp, eax
	jmp	start20

;-------------------------------------------------------------------------------
;Intrinsic to input a byte and return it in eax
; CH:= CHIN(DEV);
;
intr7:	mov	nowdev, ax		;set device number
	call	chin
	movzx	eax, al			;zero high bytes
	ret

;-------------------------------------------------------------------------------
;Intrinsic to output the byte in al.
; CHOUT(DEV, CH);
;
intr8:	mov	edx, [esp]+4		;get device number after return address
	mov	nowdev, dx
	call	chout
	ret	4			;drop arg

;-------------------------------------------------------------------------------
;Intrinsic to output a carriage return and a line feed
; CRLF(DEV);
;
intr9:	mov	nowdev, ax		;set device number
	mov	al, cr
	call	chout
	mov	al, lf
	jmp	chout			;(pjmp)

;-------------------------------------------------------------------------------
;Intrinsic to input ASCII digits and convert them to a signed, decimal, 
; 32-bit value which is returned in eax.
; VAL:= INTIN(DEV);
;	al = character input
;	ebx= working register (contains number to be converted)
;	cl = flag: a numeric character has been entered
;	ch = flag: a minus sign was entered, i.e. the number is negative
;
intr10:	push	ebx
	push	ecx			;save registers

	mov	nowdev, ax		;set device number
ii00:	xor	ebx, ebx		;NUM:=0;
	xor	ecx, ecx		;NUMFLG:=false;   SIGN:=false

	call	chin			;get character
	cmp	al, '-'			;if D0 = ^- then SIGN:=true
	jne	short ii30
	mov	ch, 0FFh
					;loop begin
ii20:	call	chin			;  get digit in al
	cmp	al, '_'			;ignore underlines
	je	ii20
ii30:	cmp	al, eof			;  if eof then exit
	je	short ii55
	sub	al, '0'			;  if al<'0' ! al>'9' then quit
	jb	short ii50
	cmp	al, 9
	ja	short ii50
	mov	cl, 0FFh		;  NUMFLG:=true

	lea	ebx, [ebx+ebx*4]	;  NUM:= NUM*10 + (D0-^0)
	add	ebx, ebx
	movzx	eax, al
	add	ebx, eax
	jmp	short ii20		;  end

ii50:	test	cl, cl			;if NUMFLG then quit
	je	short ii00
ii55:	test	ch, ch			;if SIGN then NUM:= -NUM
	je	short ii60
	neg	ebx
ii60:	mov	eax, ebx		;return NUM

	pop	ecx
	pop	ebx			;restore registers
	ret

;-------------------------------------------------------------------------------
;Intrinsic to output the signed integer on top of stack (eax).
; INTOUT(DEV, VAL);
;
intr11:	mov	edx, [esp]+4		;get device number after return address
	mov	nowdev, dx
	call	intout			;output number in eax
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Convert the signed, 32-bit value in eax to decimal ASCII and output the
; characters to "nowdev".
;	eax= number to convert and output
;	bx = index to power-of-ten table
;	cl = flag used to suppress leading zeros (suppress if false)
;	edx= remainder of divide
;
intout:	push	eax
	push	ebx
	push	ecx
	push	edx
	test	eax, eax		;jump if number is positive
	jns	short into10
	neg	eax			;otherwise make it positive
	mov	dl, al			;save number temporarily
	mov	al, '-'			;output the minus sign
	call	chout
	mov	al, dl			;restore number
into10:					;initialize:
	mov	cl, 0			; flag used to suppress leading zeros
	mov	ebx, 4*8		; index to power-of-ten table

into20:	xor	edx, edx		;sign extend positive number in eax
	div	dword ptr tentbl[ebx]	;divide by a power of ten
	test	al, al			;jump if digit is not zero
	jne	short into40
	test	cl, cl			;are we suppressing leading zeros?
	je	short into50		;jump if we are (i.e. flag = false)
into40:	mov	cl, 0FFh		;turn leading zero suppression off
	add	al, '0'			;convert digit to ASCII
	call	chout			;output it
into50:
	mov	eax, edx		;get remainder from last divide
	sub	ebx, 4			;next power of ten in table
	jns	short into20		;repeat for powers 1, 000, 000, 000 down
					; thru 10;
	add	al, '0'			;output the one's digit regardless of
	call	chout			; the leading zero suppression flag
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax
	ret

dseg	segment	para public use32 'data'
	align	4
;Power-of-ten table:
tentbl	dd	10
	dd	100
	dd	1000			;1k
	dd	10000
	dd	100000
	dd	1000000			;1m
	dd	10000000
	dd	100000000
	dd	1000000000		;1g
dseg	ends

;-------------------------------------------------------------------------------
;Intrinsic to output a text string. The starting address is top of stack
; (eax). The string is terminated by a character with bit 7 set.
; TEXT(DEV, "HELLO WORLD");
;
intr12:	mov	edx, eax		;get address of string
	mov	eax, [esp]+4		;get device number
	mov	nowdev, ax
	jmp	short txtl20		;enter loop

txtl10:	call	chout			;output character in al
	inc	edx			;point to next character
txtl20:	mov	al, [edx]		;get character at pointer
	test	al, al			;is the msb set?
	jns	short txtl10		;jump if not -- loop
	and	al, 7Fh			;clear msb
	call	chout			;output character
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to output a null-terminated text string. The starting address is in 
; top of stack (eax). The string is terminated with a binary 0.
; TEXT(DEV, "HELLO WORLD");
;
intr12a:mov	edx, eax		;get address of string
	mov	eax, [esp]+4		;get device number
	mov	nowdev, ax
	jmp	short txtl20a		;enter loop

txtl10a:call	chout			;output character in al
	inc	edx			;point to next character
txtl20a:mov	al, [edx]		;get character at pointer
	test	al, al			;is it a 0?
	jne	short txtl10a		;jump if not -- loop
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to open an input device.
; OPENI(DEV);
;
intr13:	mov	nowdev, ax		;get device number
	jmp	openi			;(pjmp) go do it

;-------------------------------------------------------------------------------
;Intrinsic to open an output device.
; OPENO(DEV);
;
intr14:	mov	nowdev, ax		;get device number
	jmp	openo			;(pjmp) go do it

;-------------------------------------------------------------------------------
;Intrinsic to close an output device.
; CLOSE(DEV);
;
intr15:	mov	nowdev, ax		;get device number
	jmp	close			;(pjmp) go do it

;-------------------------------------------------------------------------------
;Intrinsic to abort the program.
; ABORT;
;
intr16:	mov	esp, stkptr
	ret

;-------------------------------------------------------------------------------
;Intrinsic to set the error trap flags.
; TRAP($FFFF);
;
intr17:	mov	traps, ax
	ret

;-------------------------------------------------------------------------------
;Intrinsic to tell a user how many bytes of heap space he has left. Of
; course he may not reserve all of it, since he must leave a working
; heap for subsequent procedure calls etc. Since only the user knows
; how much this might be, it is left to him to decide how much he
; actually has free to play with.
; VAL:= FREE;
;
intr18:	mov	eax, heaphi
	if	flagopt
	 sub	eax, edi
	else
	 sub	eax, hp
	endif
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the rerun flag.
; VAL:= RERUN;
;
intr19:	mov	eax, rerunf
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the current value of the heap pointer. This intrinsic
; is used for saving the heap pointer for later restoration with SETHP.
; The user had better have a good idea of the funtioning of XPL before
; dinging with the heap pointer or he will surely bomb himself!
; VAL:= GETHP;
;
intr20:
	if	flagopt
	 mov	eax, edi
	else
	 mov	eax, hp
	endif
	ret

;-------------------------------------------------------------------------------
;Intrinsic to set the heap pointer--a very dangerous thing to do! See
; GETHP's comments.
; SETHP(VAL);
;
intr21:
	if	flagopt
	 mov	edi, eax
	else
	 mov	hp, eax
	endif
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the runtime error number and then clear it.
; VAL:= GETERR;
;
intr22:	movzx	eax, errnum
	mov	errnum, 0
	ret

;-------------------------------------------------------------------------------
;Intrinsic to set the video cursor to a specified position.
; CURSOR(X, Y);
;
intr23:	push	ebx
	mov	ebx, [esp]+8		;get X-position into bl, Y is in al
	call	mcurse
	pop	ebx
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to set the rerun flag directly.
; SETRUN;
;
intr25:	mov	rerunf, eax
	ret

;-------------------------------------------------------------------------------
;Intrinsic to input ASCII digits and convert them to a 32-bit hex value
; which is returned in eax.
; VAL:= HEXIN(DEV);
;	al = character input
;	ebx= accumulated value
;	cl = digit counter
;
intr26:	push	ebx
	push	ecx			;save registers
	mov	nowdev, ax		;get device number

	xor	ebx, ebx		;clear result register
	mov	cl, 8			;initialize digit counter
hexi00:	call	chin			;get character
	cmp	al, '_'			;ignore underlines
	je	hexi00
	cmp	al, eof			;if it's an eof then exit
	je	short hexi90
	cmp	al, '0'			;is character in range 0 thru 9?
	jb	short hexi40		;jump if not
	cmp	al, '9'
	ja	short hexi20		;(might be A-F)
	sub	al, '0'			;convert ASCII to binary value
	jmp	short hexi30		;go combine with other digits
hexi20:
	and	al, 0DFh		;force to uppercase
	cmp	al, 'A'			;is character in range A thru F?
	jb	short hexi40		;jump if not
	cmp	al, 'F'
	ja	short hexi40
	sub	al, 'A'-10		;convert ASCII to binary value
hexi30:
	shl	ebx, 4			;multiply current value by 16
	add	bl, al			;add new digit
	dec	cl
	jne	short hexi00		;exit if we have 8 digits
hexi40:
	cmp	cl, 8			;did we find a valid hex digit?
	je	short hexi00		;jump if not -- keep trying
hexi90:	mov	eax, ebx		;return the hex value in eax

	pop	ecx
	pop	ebx			;restore registers
	ret

;-------------------------------------------------------------------------------
;Intrinsic to output the top of stack (eax) in hex ASCII.
; HEXOUT(DEV, VAL);
;
intr27:	mov	edx, [esp]+4		;get device number
	mov	nowdev, dx
	call	hex8out			;output eax in hex ASCII
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Output eax in ASCII hex (8 digits).
;
hex8out:ror	eax, 16			;get high word
	call	hex4out			;output it
	ror	eax, 16			;(pfall) get low word back

;-------------------------------------------------------------------------------
;Output ax in ASCII hex (4 digits).
;
hex4out:xchg	ah, al			;get high byte into al
	call	hex2out			;output it
	xchg	al, ah			;(pfall) get low byte

;-------------------------------------------------------------------------------
;Output al in ASCII hex (2 digits).
;
hex2out:ror	al, 4			;move high nybble down (save low nybble)
	call	hex1out			;output it
	ror	al, 4			;(pfall) get low nybble

;-------------------------------------------------------------------------------
;Output al in ASCII hex (1 digit).
;
hex1out:push	eax			;save al

	and	al, 0Fh			;work with low nybble only
	cmp	al, 10
	jb	short h1o10
	add	al, 7
h1o10:	add	al, '0'			;convert to ASCII
	call	chout			;output it

	pop	eax			;restore al
	ret

;-------------------------------------------------------------------------------
;Intrinsic to quickly copy a block of memory. It's smart enough to handle the
; case where the TO block overlaps the FROM block.
; BLIT(FROM_SEG, FROM_OFFSET, TO_SEG, TO_OFFSET, SIZE);
; offset:16        12           8       4         eax
;
intr36:	push	esi
	push	edi			;save regs (use +8 to access arguments)

	mov	edx, eax		;get SIZE, the number of bytes to move
	mov	edi, [esp]+8+8		;get TO_SEG
	shl	edi, 4			;convert to linear address
	add	edi, [esp]+4+8		; add TO_OFFSET
	mov	esi, [esp]+16+8		;get FROM_SEG
	shl	esi, 4			;convert to linear address
	add	esi, [esp]+12+8		; add FROM_OFFSET

	cmp	esi, edi		;compare TO and FROM addresses
	je	blit90			;bail out if no need to move anything
	jb	blit50			;move possibly overlapping blocks

;FROM block is above the TO block, so start at the beginning of the blocks
;Move 4 bytes (dword) at a time for speed, but first deal with partial dword
	shr	edx, 1			;convert byte count to word count
	jnc	blit10			;jump if # bytes to move is even
	 mov	al, fs:[esi]		;move byte from esi to edi
	 inc	esi
	 mov	fs:[edi], al
	 inc	edi
;The fs override must be used rather than the default ds (after subtracting
; dseg) because Windows XP limits access to memory (it's protected), and it
; doesn't allow a negative offset from ds, which occurs when blitting the PSP
blit10:
	shr	edx, 1			;convert word count to dword count
	jnc	blit15			;jump if # words to move is even
	 mov	ax, fs:[esi]		;move word from esi to edi
	 inc	esi
	 inc	esi
	 mov	fs:[edi], ax
	 inc	edi
	 inc	edi
blit15:
	test	edx, edx		;all done?
	je	blit90

blit20:	mov	eax, fs:[esi]		;move dword from esi to edi
	add	esi, 4
	mov	fs:[edi], eax
	add	edi, 4
	dec	edx			;loop for all dwords
	jne	blit20
	jmp	blit90

;FROM block is below the TO block, so start at the end of the blocks
blit50:
	add	esi, edx		;point to last byte +1 in blocks to copy
	add	edi, edx

	shr	edx, 1			;convert byte count to word count
	jnc	blit55			;jump if # bytes to move is even
	 dec	esi			;descend from end of block
	 mov	al, fs:[esi]		;move byte from esi to edi
	 dec	edi
	 mov	fs:[edi], al
blit55:
	shr	edx, 1			;convert word count to dword count
	jnc	blit60			;jump if # words to move is even
	 dec	esi
	 dec	esi
	 mov	ax, fs:[esi]		;move word from esi to edi
	 dec	edi
	 dec	edi
	 mov	fs:[edi], ax
blit60:
	test	edx, edx		;all done?
	je	blit90

blit65:	sub	esi, 4			;copy in descending order
	sub	edi, 4
	mov	eax, fs:[esi]		;move dword from esi to edi
	dec	edx			;(funny order tries to keep pipes full)
	mov	fs:[edi], eax
	jne	blit65			;loop for all dwords
blit90:
	pop	edi			;restore registers
	pop	esi
	ret	4*4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to output a value to a port.
; POUT(VAL, PORT, WORDSIZE);
;
intr64:	mov	edx, [esp]+4		;get port number
	test	eax, eax		;test size
	mov	eax, [esp]+8		;get value
	jne	short pout1		;skip if 16-bit
	out	dx, al			;write 8-bit value
	ret	2*4			;drop args
pout1:
	out	dx, ax			;write 16-bit value
	ret	8

;-------------------------------------------------------------------------------
;Intrinsic to input a value from a port.
; VAL:= PIN(PORT, WORDSIZE)
;
intr65:	mov	edx, [esp]+4		;get port number
	test	eax, eax		;test size
	jne	short pin1		;skip if 16-bit input
	in	al, dx			;read 8-bit value (high bytes are 0)
	ret	4			;drop args
pin1:
	in	ax, dx			;read 16-bit value
	movzx	eax, ax			;zero high bytes
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to allocate low memory (<1M). The number of paragraphs to allocate
; is in tos (eax). The address of the allocated memory is returned in tos.
; SEGMENT_ADDRESS:= MALLOC(PARAGRAPHS);
; Interrupt 31h works for WinXP, but it does not work for DOS. Thus if the
; first method fails, interrupt 21h is called. (It is strange that int 21h
; does not work for Windoze. Even stranger is that if 21h is tried first then
; 31h fails.)
;
intr73:	push	ebx			;save register
	mov	ebx, eax		;get allocation size in paragraphs
	push	ebx			;save it (gets overwritten if error)
	mov	ax, 0100h
	int	31h			;int 31h does the call to real mode
	pop	ebx			;restore parargraphs       (destroys dx)
	jnc	malc30			;exit if no errors
					;Windoze version failed, try DOS version
	mov	ah, 48h			;memory allocate function
	int	21h			;call DOS
	jnc	malc30			;skip if no errors
	mov	axreg, ax		;save error conditions
	mov	bxreg, bx
	mov	al, 2			;flag memory error
	call	error			;handle error
	mov	ax, dseg		;use data segment (better than nothing)
malc30:
	movzx	eax, ax			;convert segment address to 32-bit value
	pop	ebx			;restore register
	ret

;-------------------------------------------------------------------------------
;Intrinsic to release a block of memory back to DOS. The segment address of the
; memory to release is in tos (eax).
; RELEASE(SEGMENT_ADDRESS);
;
intr74:	push	eax
	push	ebx
	push	ecx
	push	edi			;save registers

	mov	edi, offset regs_edi	;point to regs
	mov	[regs_ah-regs_edi+edi], 49h ;set function to release memory blk
	mov	[regs_es-regs_edi+edi], ax  ;segment of block to be released
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;set up to call interrupt 21h
	mov	ax, 300h		;do function 0300h
	int	31h			;do int 31h to do the call to real mode

	pop	edi
	pop	ecx
	pop	ebx
	pop	eax			;restore registers

	push	regs_flags		;get flags returned by real-mode int
	popf
	sti
	jnc	short reles1		;skip if no errors
	mov	axreg, ax		;save error conditions
	mov	al, 2			;flag memory error
	call	error			;handle error
reles1:	ret

;-------------------------------------------------------------------------------
;Routines to trap and filter control-c and control-break
;
dseg	segment	para public use32 'data'
ctcflg	dd	0			;flag: ctrl-c or ctrl-break was struck
cinflg	dw	0			;flag: our TRAPC handler is installed
int23	dd	0			;address of original ctrl-c handler
int1b	dd	0			;address of original ctrl-break handler
dseg	ends

;-------------------------------------------------------------------------------
;Intrinsic to turn on or off control-c trapping. Passing 'true' prevents
; Ctrl-C and Ctrl-Break keys from aborting a program.
; TRAPC(true);
;
intr75:	test	eax, eax		;is argument false?
	je	rstbrk			;jump if so - restore original handlers

	cmp	cinflg, 0		;is our vector already installed?
	jne	rstbk1			;exit if so

;Save then change vector address for both control-c and control-break handlers
	push	ebx
	push	ecx
	push	edi

	mov	edi, offset regs_edi
	mov	[regs_ax-regs_edi+edi], 3523h ;save address of int 23h handler
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode
	mov	bx, [regs_bx-regs_edi+edi]
	mov	word ptr int23, bx
	mov	bx, [regs_es-regs_edi+edi]
	mov	word ptr int23+2, bx

	mov	[regs_ax-regs_edi+edi], 351Bh ;save address of int 1Bh handler
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode
	mov	bx, [regs_bx-regs_edi+edi]
	mov	word ptr int1b, bx
	mov	bx, [regs_es-regs_edi+edi]
	mov	word ptr int1b+2, bx

	mov	[regs_ax-regs_edi+edi], 2523h ;set int 23h vector
	mov	edx, offset ctrbrk	;load far pointer into break handler
	mov	[regs_dx-regs_edi+edi], dx
	mov	ax, seg ctrbrk
	mov	[regs_ds-regs_edi+edi], ax
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode

	mov	[regs_ax-regs_edi+edi], 251Bh ;set int 1Bh vector
	mov	edx, offset ctrbrk	;load far pointer into break handler
	mov	[regs_dx-regs_edi+edi], dx
	mov	ax, seg ctrbrk
	mov	[regs_ds-regs_edi+edi], ax
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode

	pop	edi
	pop	ecx
	pop	ebx

	or	cinflg, -1		;indicate our vectors are installed
	ret

;Restore original ctrl-c and ctrl-break vectors
rstbrk:	cmp	cinflg, 0		;is our vector installed?
	je	rstbk1			;exit if not

;Restore original vectors
	push	ebx
	push	ecx
	push	edi

	mov	edi, offset regs_edi
	mov	[regs_ax-regs_edi+edi], 2523h ;set int 23h vector
	mov	dx, word ptr int23
	mov	[regs_dx-regs_edi+edi], dx
	mov	dx, word ptr int23+2
	mov	[regs_ds-regs_edi+edi], dx
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode

	mov	[regs_ax-regs_edi+edi], 251Bh ;set int 1Bh vector
	mov	dx, word ptr int1b
	mov	[regs_dx-regs_edi+edi], dx
	mov	dx, word ptr int1b+2
	mov	[regs_ds-regs_edi+edi], dx
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode

	pop	edi
	pop	ecx
	pop	ebx

rstbk1:	ret

;-------------------------------------------------------------------------------
;Intrinsic to return 'true' if ctrl-c or ctrl-break has been struck
; BOOLEAN:= TESTC;
;
intr76:	mov	eax, ctcflg		;get flag
	mov	ctcflg, 0		;clear flag
	ret

;-------------------------------------------------------------------------------
;EQUIP
;
intr77:
	mov	al, 5			;flag bad intrinsic
	jmp	error

;-------------------------------------------------------------------------------
;SHRINK
;
intr78:
	ret

;-------------------------------------------------------------------------------
;Intrinsic to turn on or off IRQs
; IRQ(false);
;
intr80:	test	eax, eax		;is it false?
	je	short irq10		;jump if so
	sti				;enable interrupts
	ret
irq10:
	cli				;disable interrupts
	ret

;-------------------------------------------------------------------------------
;Intrinsic to get the current time (in microseconds)
; Windows is very slow at turning interrupts off and on so this codes avoids it
; TIME:= GETTIME;
;
intr82:	push	ecx			;preserve register

	mov	cx, fs:[46Ch]		;get BIOS timer

	mov	al, 0			;latch count in timer 0
	out	43h, al
	in	al, 40h			;read low byte
	mov	ah, al
	in	al, 40h			;read high byte

	mov	dx, fs:[46Ch]		;get BIOS timer again
	cmp	dx, cx			;skip if there is no change; otherwise a
	je	gtim10			; timer-interrupt occurred, so...
	mov	al, 0			;reread timer 0
	out	43h, al
	in	al, 40h			;read in low byte
	mov	ah, al
	in	al, 40h			;read in high byte
gtim10:
	xchg	al, ah
	not	eax			;re-invert the bits (to count upward)
	shl	eax, 16			;hi, lo, 0, 0
	mov	ax, dx			;hi, lo, HI, LO
	rol	eax, 16			;HI, LO, hi, lo

;convert counts to microseconds by *838/1000 *2^32 and use high dword
	mov	ecx, 3599182594
	mul	ecx			;edx:eax <- eax*ecx
	mov	eax, edx		;return high dword in eax

	pop	ecx			;restore register
	ret

;Initialize (open) the (8253) system timer
;The system timer (timer 0) is normally set to mode 3, as defined by IBM.
; Some BIOSes, Win3.1 and WinXP have a bug that instead sets this timer to
; mode 2. This does not change the interrupt rate, but it does change the
; rate that the internal counter decrements. Mode 3 decrements by 2 (and
; makes two pases), and mode 2 decrements by 1. Since the internal counter
; is read by this code, it is essential that its rate be correct. Of course
; WinXP (in its infinite wisdom) does not allow reprogramming the mode, so
; the non-standard (but more logical) mode 2 is used here.

timerop:push	eax
	mov	al, 34h			;set timer 0 to mode 2
	out	43h, al
	mov	al, 0FFh		;set 16-bit countdown timer to maximum
	out	40h, al			; (low byte first)
	out	40h, al
	pop	eax
	ret

;Restore normal timer mode (3) (i.e: close it)

timercl:push	eax
	mov	al, 36h			;set timer 0 to mode 3
	out	43h, al
	mov	al, 0FFh		;set 16-bit countdown timer to maximum
	out	40h, al			; (low byte first)
	out	40h, al
	pop	eax
	ret

;-------------------------------------------------------------------------------
;Intrinsic to set backup flag so that last input character is reread.
; BACKUP;
;
intr83:	mov	backFl, 1
	ret

;===============================================================================
;                         FLOATING POINT ROUTINES
;===============================================================================
;
;All floating point variables are in the IEEE long real format, which is
; 64 bits (eight bytes) long. Reals are always stored in normal memory order
; (except when on the stack and being passed as arguments to procedures).
; Normal order is least significant byte of the mantissa in the lowest address
; and the exponent in the highest address. Real arrays require that an address
; be placed inside a 64-bit real. The format is least significant byte of the
; address first. The four most significant bytes of the real are not used.
;
;		   0     1     2     3
;		+-----+-----+-----+-----+
;		| LSB |     |     | MSB |
;		+-----+-----+-----+-----+
;		 INTEGERS AND ADDRESSES
;
;
;		   0     1     2     3     4     5     6     7
;		+-----+-----+-----+-----+-----+-----+-----+-----+
;		| LSB |     |     |     |     |     | MSB | EXP |
;		+-----+-----+-----+-----+-----+-----+-----+-----+
;				      REALS
;
;
;		   0     1     2     3     4     5     6     7
;		+-----+-----+-----+-----+-----+-----+-----+-----+
;		| LSB |     |     | MSB |  0  |  0  |  FF |  3  |
;		+-----+-----+-----+-----+-----+-----+-----+-----+
;				ADDRESSES IN REALS
;
;
dseg	segment	para public use32 'data'
	align	8
fptemp	dd	0, 0			;very temporary memory location for FPU
					;second double word is used to make
					; array addresses (3FF00000h)
form1	dw	5			;arguments for XPL format intrinsic
form2	dw	5			; format(form1, form2);
dseg	ends

;===============================================================================
;                        FLOATING POINT INTRINSICS
;===============================================================================
;Intrinsic to reserve heap space for real numbers. This reserves tos*8
; bytes of heap space and returns the starting 32-bit address converted
; to a real on the 387 stack.
; ARRAY:= RLRES(5);
;
intr46:
	if	flagopt
	 mov	eax, [esp]+4		;real intrinsic so tos is on the stack
	 shl	eax, 3			;multiply by 8 (rlsize)
	 mov	edx, edi		;get address of reserved space
	 mov	fptemp, edx		;combine it with 1.0 in fptemp
	 mov	fptemp+4, 3FF00000h	; to form floating point base address
	 fld	qword ptr fptemp	; and push it onto mathcoprocessor stack
	 add	edi, eax		;add number of bytes to heap pointer
	 mov	eax, edi		;heap overflow?
	else
	 mov	eax, [esp]+4		;real intrinsic so tos is on the stack
	 shl	eax, 3			;multiply by 8 (rlsize)
	 mov	edx, hp			;get address of reserved space
	 mov	fptemp, edx		;combine it with 1.0 in fptemp
	 mov	fptemp+4, 3FF00000h	; to form floating point base address
	 fld	qword ptr fptemp	; and push it onto mathcoprocessor stack
	 add	hp, eax			;add number of bytes to heap pointer
	 mov	eax, hp			;heap overflow?
	endif
	cmp	eax, heaphi
	jb	short flres1		;jump if not
	mov	al, 2			;RUN-TIME ERROR 2: OUT OF MEMORY
	call	error			;handle error
flres1:
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to input a floating-point number from the specified device.
; REAL:= RLIN(DEVICE);
;
intr47:	mov	eax, [esp]+4		;get device number
	mov	nowdev, ax
	call	finp			;input a real into st(0)
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to output the floating point number.
; RLOUT(DEV, REAL);
;
intr48:	mov	eax, [esp]+4		;get device number
	mov	nowdev, ax
	call	fout			;output real
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to convert the integer on tos to a floating point number.
; Note that this is compiled as a real intrinsic with tos on the stack.
; REAL:= FLOAT(INTEGER);
;
intr49:	fild	dword ptr [esp]+4
	ret	4

;-------------------------------------------------------------------------------
;Intrinsic to convert real on tos to nearest integer.
; Note that this is compiled as an integer intrinsic with tos in eax.
; INTEGER:= FIX(REAL);
;
intr50:	pop	edx			;get return address
	push	eax			;keep stack balanced

	fistp	fptemp			;round to integer and store
	fstsw	ax			;get coprocessor status
	test	al, 01Dh		;test for errors
	je	short fixfn1		;skip if no error
	mov	al, 9			;get error number
	call	error			;handle the error
fixfn1:
	mov	eax, fptemp		;return integer in eax

	push	edx			;restore return address
	ret

;-------------------------------------------------------------------------------
;Intrinsic to take the absolute value of the real number on tos.
; REAL:= RLABS(REAL);
;
intr51:	fabs
	ret

;-------------------------------------------------------------------------------
;Intrinsic to set the format of floating point number output for RLOUT.
; FORMAT(F1, F2);
;  F1 > 0:  decimal notation, with F1 digits in front of decimal point
;  F1 = 0:  scientific notation		  1.23E+023
;  F1 < 0:  engineering notation	123.0E+021
;  F2:  number of digits after the decimal point
;
intr52:	mov	eax, [esp]+4
	mov	form2, ax
	mov	eax, [esp]+8
	mov	form1, ax
	ret	8

;-------------------------------------------------------------------------------
;Intrinsic to return the square root of top of stack.
; TOS:= SQRT(TOS);
;
intr53:	fsqrt				;do sqrt
	fstsw	ax			;get coprocessor status
	test	al, 01Dh		;test for errors
	jne	short flsqt1		;jump if error
	ret
flsqt1:
	mov	al, 10			;flag the error
	jmp	error			;(pjmp)

;-------------------------------------------------------------------------------
;Intrinsic to return the natural logarithm (base e) of top of stack.
; TOS:= LN(TOS);
;
intr54:	fldln2				;get ln(2)
	fxch
	fyl2x				;ln(2) *log2(st) = ln(st)

	fstsw	ax			;get the status
	test	al, 01Dh		;errors?
	jne	short flln1		;skip if error
	ret
flln1:
	mov	al, 11			;flag the error
	jmp	error			;(pjmp)

;-------------------------------------------------------------------------------
;Intrinsic to return the exponential function of top of stack.
; TOS:= EXP(TOS);
; e^x = 2^z; where z = (x * log2(e))
; convert z to: integer + fraction
; then: 2^(fraction) and add integer to exponent
;
intr55:	fldl2e				;x * log2(e)
	fmul
	fld	st			;duplicate tos
	frndint				;round to integer
	fsub	st(1), st		;st(1):= fraction(st(1))
	fxch				;put fraction in st
	f2xm1				;2^x - 1
	fld1				;add 1.0
	fadd
	fscale				;add in exponent
	fstp	st(1)

	fstsw	ax			;get the status
	test	al, 01Ch		;test for errors
	jne	short flexp1		;skip if error
	ret
flexp1:
	mov	al, 12			;flag the error
	jmp	error			;(pjmp)

;-------------------------------------------------------------------------------
;Intrinsic to return the sine of top of stack.
; TOS:= SIN(TOS);
;
intr56:	fsin				;take the sine
	fstsw	ax			;get the status
	test	ax, 041Ch		;test for errors
	jne	short sinerr		;jump if error
	ret

sinerr:	mov	eax, [esp]
	mov	erraddr, eax
	mov	al, 7			;must be overflow error
	jmp	error			;(pjmp)

;-------------------------------------------------------------------------------
;Intrinsic to return the arctan of y/x (nos/tos).
; TOS:= ATAN2(Y, X);
;
intr57:	fpatan				;do atan2 and pop
	fxam				;look at the number type
	fstsw	ax			;get the status

	test	ah, 01h			;result = special?
	jne	short flat1		;jump if error
	ret
flat1:
	mov	al, 14			;flag the error
	jmp	error			;(pjmp)

;-------------------------------------------------------------------------------
;Intrinsic to return the x modulo y.
; TOS:= MOD(X, Y);
;
intr58:	fxch

remlop:	fprem				;do partial remainder
	fstsw	ax			;get the status
	sahf				;put into cpu flags
	jp	short remlop		;loop until result is in range

	fxch
	fstp	st			;discard top of stack
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the common logarithm (base 10) of top of stack.
; TOS:= LOG(TOS);
;
intr59:	fldlg2				;get log(2)
	fxch
	fyl2x				;st:= st(1) * log2(st)

	fstsw	ax			;get the status
	test	al, 01Dh		;test for errors
	jne	short fllog1		;jump if error
	ret
fllog1:
	mov	al, 11			;flag the error
	jmp	error			;(pjmp)

;-------------------------------------------------------------------------------
;Intrinsic to return the cosine of top of stack.
; TOS:= COS(TOS);
;
intr60:	fcos				;take the cosine
	fstsw	ax			;get the status
	test	ax, 041Ch		;test for errors
	jne	short sinerr		;jump if error
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the tangent of top of stack.
; TOS:= TAN(TOS);
;
intr61:	fptan				;take the tangent
	fstsw	ax			;get the status
	test	ax, 041Ch		;errors?
	fstp	st			;drop tos (1.0)
	jne	short sinerr		;jump if so
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the arcsine of top of stack by doing:
; atan(x/sqrt(1-x^2))
; TOS:= ASIN(TOS);
;
intr62:	fld	st			;duplicate tos
	fmul	st, st			;x^2
	fld1				;get 1.0
	fsubr				;1 - x^2
	fsqrt				;sqrt(1-x^2)
	fpatan				;take atan2

	fstsw	ax			;get the status
	test	al, 01Ch		;errors?
	jne	short sinerr		;jump if so
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the arccosine of top of stack by doing:
; -asin(x) + pi/2
; TOS:=  ACOS(TOS);
;
dseg	segment	para public use32 'data'
piover2	dq	1.57079632679489661923
dseg	ends

intr63:	fld	st			;duplicate tos
	fmul	st, st			;x^2
	fld1				;get 1.0
	fsubr				;1 - x^2
	fsqrt				;sqrt(1-x^2)
	fpatan				;take atan2
	fchs				;complement sign
	fadd	piover2

	fstsw	ax			;get the status
	test	al, 01Ch		;errors?
	jne	sinerr			;jump if so
	ret

;-------------------------------------------------------------------------------
;Routine to test for and initialize the 387 math coprocessor.
; ax and si are destroyed.
;
dseg	segment	para public use32 'data'
mes387	db	"This program requires a math coprocessor.", cr, lf, tsp
dseg	ends

tst387:	fninit				;initialize using non-wait form
	mov	esi, offset fptemp	;point to temporary
	mov	word ptr [si], 5a5ah	;set fptemp to non-zero value
	fnstsw	[si]			;get processor status (non-wait)
	cmp	byte ptr [si], 0	;status is 0 if there is a coprocessor
	jne	short no387		;jump if no coprocessor

;Can we access the control register?
	fnstcw	[si]			;get control word
	mov	ax, [si]
	and	ax, 103Fh		;test selected bits
	cmp	ax, 3Fh			;check that ones and zeros are correct
	jne	short no387		;jump if no coprocessor

;Now check for 87 vs 287 vs 387
	fld1				;form infinity (one divided by zero)
	fldz
	fdiv
	fld	st			;duplicate tos
	fchs				;make tos negative infinity
	fcompp				;pos and neg infinity not equal on 387
	fstsw	[si]			;get status in cpu flags
	mov	ax, [si]
	sahf
	je	short no387		;jump if 87 or 287

;Here if 387 is present
	finit				;reinitialize
	ret

;Here if 387 is not present
no387:	lea	si, mes387		;display error message
	call	text
	jmp	start90			;exit back to operating system

;===============================================================================
;			FLOATING POINT I/O ROUTINES
;===============================================================================
;
dseg	segment	para public use32 'data'
	align	8
tencon	dq	10.0
kilcon	dq	1000.0
hlfcon	dq	0.5
onecon	dq	1.0
dseg	ends

;-------------------------------------------------------------------------------
;Input a floating-point number in long real format and return it in st.
; Register usage:
;	ax = character input
;	cl = flag: number is negative
;	ch = flag: exponent is negative
;	bx = exponent
;	edx = used to multiply by 10
;
finp:	push	eax
	push	ebx
	push	ecx
	push	edx			;save registers

	xor	ecx, ecx		;assume positive number & exponent
	xor	ebx, ebx		;zero exponent
	fldz				;zero st

;Loop to get leading characters
finled:	call	getdig			;get a character
	jc	short finint		;jump if it is a digit
	cmp	al, '-'			;minus sign?
	jne	short finld1		;skip if not
	not	cl			;set negative number flag
	jmp	short finled		;loop--get next character

finld1:	cmp	al, eof			;exit if end of file
	je	finxt1
	cmp	al, '.'			;is it a decimal point?
	jne	short finled		;loop if not
	jmp	short finfrc		;else go get fractional part

;Read the integer part
finint:	call	addin			;add in the digit to value in st(0)
	call	getdig			;get next character
	jc	short finint		;loop if it is a digit

	cmp	al, '.'			;decimal point?
	jne	short finend		;jump if not--end of number

;Read the fractional part following the decimal point
finfrc:	call	getdig			;get a digit
	jnc	short finend		;exit loop if not a digit
	call	addin			;add in the digit
	dec	ebx			;decremnent exponent
	jmp	short finfrc		;loop until not digit

;Here on end of number, handle exponent if any
finend:	cmp	al, 'E'			;exponent?
	je	short finexp		;then handle it
	cmp	al, 'e'			;also check for lowercase
	jne	short finfin		;jump if no exponent--go finish up

finexp:	call	getdig			;get a character
	jc	short finex0		;skip if digit
	cmp	al, eof			;exit if end of file
	je	short finxt1
	cmp	al, '-'			;negative?
	jne	short finexp		;loop if not
	not	ch			;set flag for negative exponent
	jmp	short finexp		;loop until digit

;Here when we've got a digit in al, convert digits to binary in edx
finex0:	xor	edx, edx		;initialize accumulator (edx)
finex1:	lea	edx, [edx+edx*4]	;multiply edx by 10 the fast and
	add	edx, edx		; tricky way
	add	edx, eax		;add in digit
	call	getdig			;get next character
	jc	short finex1		;loop if it is a digit

	test	ch, ch			;negative exponent?
	je	short finex2		;skip if not
	neg	edx			;make it negative
finex2:	add	ebx, edx		;combine with other exponent

;Now adjust number according to exponent
finfin:	test	ebx, ebx		;test exponent
	je	short finext		;zero? then exit
	js	short fexneg		;negative?

;Here if exponent is positive
fexps1:	fmul	tencon			;multiply by ten
	dec	ebx
	jne	short fexps1		;loop
	jmp	short finext

;Here if exponent is negative
fexneg:	fdiv	tencon			;divide by 10
	inc	ebx
	jne	short fexneg		;loop

finext:	test	cl, cl			;negative number?
	je	short finxt1		;skip if not
	fchs				;make it negative
finxt1:
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax			;restore registers
	ret

;-------------------------------------------------------------------------------
;Get a charater in al. If it's a number (0-9) then set carry flag and
; convert it to binary in eax.
;
getdig:	call	chin			;get a character
	cmp	al, '_'			;ignore underlines
	je	getdig
	cmp	al, '0'			;test for number
	jb	short notdig		;skip if not
	cmp	al, '9'			;test again
	ja	short notdig		;skip if not
	and	eax, 0Fh		;make binary number
	stc				;flag that it's a number
	ret
notdig:
	clc				;flag not a number
	ret

;-------------------------------------------------------------------------------
;Multiply st by 10 and add the digit in al. ax is destroyed.
;
addin:	fmul	qword ptr tencon	;multiply st(0) by ten
	mov	ah, 0			;make sure high byte is zero
	mov	word ptr fptemp, ax	;load and float the digit
	fiadd	word ptr fptemp		;add in digit
	ret

;-------------------------------------------------------------------------------
;Output floating point number in st(0).
; Inputs: form1 and form2 which control the format.
; Register usage:
;	eax = scratch
;	bx = exponent
;	ch = flags for number to output
;	cx = number of digits before decimal point
;	dx = maxium digits we can display
;	si = sign of number to output
;	di = number of integer digits
;	bp = coprocessor status of number to output
;
sigfig	equ	15			;number of digits of precision

fout:	pusha				;save registers

	fxam				;test for strange numbers
	fstsw	ax			;get status
	sahf
	jc	short fotspc		;carry = c0; if set, number is special
	jne	short fptgdn		;zero = c3; if set, number isn't special
	jnp	short fptgdn		;parity = c2; if set, number is special

;Here to handle special values, like infinity, not a number (nan), etc.
fotspc:	mov	al, '?'			;just display '???'
	call	chout
	call	chout
	call	chout
	jmp	fout99			;exit

fptgdn:	ftst				;test the sign of the number
	fstsw	ax			;get the status
	mov	ch, ah			;save for later
	and	ah, 01h			;c0 = sign
	mov	si, ax			;save a copy
	fabs				;force number to be positive

	mov	ax, form1		;determine format type
	test	ax, ax
	je	short scinot		;=0 = scientific notation
	jl	short engnot		;<0 = engineering notation
					;>0 = standard decimal notation
	mov	cx, ax			;number of digits before decimal point
	call	fixpnt			;do decimal notation output
	jmp	fout99			;exit

;Output number in scientific notation
scinot:	xor	bx, bx			;zero exponent
	mov	ah, ch			;restore saved status
	sahf				;and set cpu flags with it
	je	short scint4		;skip if number is zero

;Multiply number by ten until it's not a fraction
scint1:	fcom	onecon			;is number < one ?
	fstsw	ax
	sahf
	jae	short scint2		;exit if >= one
	fmul	tencon			;multiply by ten
	dec	bx			;decrement exponent
	jmp	short scint1		;loop

;Divide by ten until it's less than ten
scint2:	fcom	tencon			;is number >= 10 ?
	fstsw	ax
	sahf
	jb	short scint4		;exit < 10
	fdiv	tencon			;divide by ten
	inc	bx			;increment exponent
	jmp	short scint2		;loop

scint4:	mov	cx, 2			;set 2 places before decimal point
	jmp	short expout		;output exponent

;Output the number in engineering notation
engnot:	xor	bx, bx			;zero exponent
	mov	ah, ch			;restore status
	sahf				;and set cpu flag with it
	je	short engnt4		;skip if zero

;Multiply number by a thousand until it's not a fraction
engnt1:	fcom	onecon			;is number < one ?
	fstsw	ax
	sahf
	jae	short engnt2		;exit when >= one
	fmul	kilcon			;multiply by one thousand
	sub	bx, 3			;decrement exponent
	jmp	short engnt1		;loop

;Divide by a thousand until it's less than a thousand
engnt2:	fcom	kilcon			;is number >= 1000 ?
	fstsw	ax
	sahf
	jb	short engnt4		;exit when < 1000
	fdiv	kilcon			;divide by 1000
	add	bx, 3			;increment exponent
	jmp	short engnt2		;loop

engnt4:	mov	cx, 4			;set 4 places before decimal point

;Output exponent created by floating point adjustments
expout:	call	fixpnt			;output fix point part (123.45)
	mov	al, 'E'			;output the 'E'
	call	chout

	mov	al, '+'			;get a plus sign
	test	bx, bx			;negative exponent?
	jge	short expot1		;skip if not
	neg	bx			;make exponent positive
	mov	al, '-'			;get a minus sign
expot1:	call	chout			;output the sign

;Output leading zeros of exponent, if any (i.e: 123.45E+003)
	cmp	bx, 100			;no leading zero if >= 100
	jge	short expot3
	cmp	bx, 10			;one zero if >= 10
	jge	short expot2
	mov	al, '0'			;do leading zeros
	call	chout
expot2:	mov	al, '0'
	call	chout
expot3:	movsx	eax, bx			;output the exponent
	call	intout
fout99:
	fstp	st(0)			;pop number off stack
	popa				;restore registers
	ret

;-------------------------------------------------------------------------------
;Routine to output the floating-point number in st(0) in standard decimal
; notation. The number of digits before the decimal point is specified in
; "cx" and the number of digits after is in "form2".
;
fixpnt:	fcom	onecon			;test for fraction
	fstsw	ax			;and save resulting flags
	mov	bp, ax

	ftst				;test the number
	fstsw	ax			;get the status
	sahf				;is number zero?
	jne	short fixpt0		;skip if not
	xor	di, di			;no integer digits
	jmp	short fixpt1		;go output the zero
fixpt0:
	call	fraciz			;convert to a fraction
	call	fround			;round the number
fixpt1:
;Calculate and output the number of leading blanks, and output the sign 
	sub	cx, di
	mov	ax, bp			;get number status
	sahf				;is number <1?
	jae	short lblnk1		;skip if not
	dec	cx			;count the leading zero
lblnk1:	test	si, 0100h		;test the sign of the number
	je	short lblnk2		;skip if positive
	dec	cx			;else count the sign
lblnk2:
	test	cx, cx			;deal zero or negative
	jle	short lblnk4		;skip no blanks
lblnk3:	mov	al, ' '			;output a blank
	call	chout
	dec	cx			;loop
	jne	short lblnk3

;Now test and handle the sign
lblnk4:	test	si, 0100h		;test the sign
	je	short lblnk5		;skip if positive
	mov	al, '-'			;get minus sign
	call	chout			;output it

;Now output leading zero if number is a fraction
lblnk5:	mov	ax, bp			;get number status
	sahf				;is number < 1
	jae	short lblnk6		;skip if not
	mov	al, '0'			;output leading zero
	call	chout
lblnk6:
;Output the actual digits of the number
	mov	dx, sigfig		;set maximum to output
	mov	cx, di			;set integer part
	call	digout			;output integer part
	cmp	form2, 0		;any digits after decimal point?
	jle	short fixpt2		;jump if not--exit

	mov	al, '.'			;output the decimal point
	call	chout
	mov	cx, form2		;set number of digits after decimal pt.
	call	digout			;display fractional part
fixpt2:	ret

;-------------------------------------------------------------------------------
;Round the number by adding 0.5 * 10 ^ -(di+form2)
;
fround:	mov	ax, form2		;calculate total digits to display
	add	ax, di

	cmp	ax, sigfig		;out of range?
	jbe	short frond1		;skip if not
	mov	ax, sigfig		;else use sigfigs
frond1:
	cwde
	fadd	qword ptr rndtab[eax*8]	;add in rounding factor
	fcom	onecon			;overflow? (st(0)>=1)
	fstsw	ax
	sahf
	jae	short frond2		;skip if so
	ret

;Handle rounding overflow by eliminating the leading zero and adjust the
; number back to a fraction
frond2:	xor	bp, bp			;flag no leading zero
	jmp	short fracz2		;adjust back below 1

;-------------------------------------------------------------------------------
;Routine to divide st(0) by ten until it is a fraction
fraciz:	xor	di, di			;zero counter
fracz1:	fcom	onecon			;st(0) <= one?
	fstsw	ax
	sahf
	jb	short frazxt		;then exit
fracz2:	fdiv	tencon			;divide by 10
	inc	di			;count integer digits
	jmp	short fracz1		;loop
frazxt:	ret

dseg	segment	para public use32 'data'
	align	8
rndtab	dq	5.0e-1			;rounding constants
	dq	5.0e-2
	dq	5.0e-3
	dq	5.0e-4
	dq	5.0e-5
	dq	5.0e-6
	dq	5.0e-7
	dq	5.0e-8
	dq	5.0e-9
	dq	5.0e-10
	dq	5.0e-11
	dq	5.0e-12
	dq	5.0e-13
	dq	5.0e-14
	dq	5.0e-15
	dq	5.0e-16
dseg	ends

;-------------------------------------------------------------------------------
;Output a string of floating point digits
; Number of digits to output is in cx, which is destroyed
; If precision is exceeded, output a zeroes
;
digout:	test	cx, cx			;exit if cx=0
	je	short digot9

digot1:	test	dx, dx			;more displayable digits?
	je	short zerout		;jump if not--output a zero
	fmul	tencon			;multiply st(0) by 10

	fld	st(0)			;fix to a nibble, dupicate st(0)
	fsub	hlfcon			;subtract .5
	fistp	word ptr fptemp		;store result

	mov	ax, word ptr fptemp	;save digit
	add	al, '0'			;convert to ASCII
	cmp	al, '9'
	jle	short do10
	mov	al, '9'
do10:	call	chout			;display it

	fild	word ptr fptemp		;load and float the digit
	fsub				;subrtract off this digit
	dec	dx			;count digit
	jmp	short digot2

zerout:	mov	al, '0'			;output a zero
	call	chout
digot2:
	dec	cx			;loop
	jne	short digot1

digot9:	ret

;===============================================================================
;			SOUND AND GRAPHIC ROUTINES
;===============================================================================
;Intrinsic to generate sound using the 8253 chip.
; SOUND(VOLUME, DURATION, FREQUENCY);
; If volume = 0 then no sound, otherwise sound at full volume.
; bx = duration of the sound is SLENG /18.2 in seconds
; cx = frequency is 1.19 million /SFREQ
;
portb	equ	61h			;B port
latch2	equ	42h

intr39:	push	bx			;save registers (add +4 to esp offsets)
	push	cx

	mov	cx, ax			;get frequency
	mov	bx, [esp]+4+4		;get duration

	cmp	dword ptr [esp]+8+4, 0	;get volume 
	je	short sound2		;jump if no sound--just delay

	cli
	in	al, portb		;get port value
	jmp	short $+2		;delay
	or	al, 03h			;enable speaker and timer
	out	portb, al		;beware of interrupts
	sti

	mov	al, 0B6h		;set channel 2 for mode 3
	out	43h, al			;set command register

;Start the note
	mov	ax, cx			;get frequency
	out	latch2, al
	jmp	short $+2		;delay
	mov	al, ah
	out	latch2, al

sound2:	mov	ah, 0			;get tick count in cx:dx
	int	1ah
	add	bx, dx			;add to the length

sound5:	mov	ah, 0			;get tick count in cx:dx
	int	1ah
	test	al, al			;has midnight passed since last read?
	je	short sound9		;jump if not
	sub	bx, 0B0h		;adjust for resetting clock from
sound9:	cmp	dx, bx			; 1800B0 to 000000
	js	short sound5		;loop until done (tricky signed compare)

;Turn off speaker
	cli
	in	al, portb		;get current value
	jmp	short $+2		;delay
	and	al, 0FCh		;turn off speaker
	out	portb, al
	sti

	pop	cx			;restore registers
	pop	bx
	ret	2*4			;drop args

;===============================================================================
;                            GRAPHICS ROUTINES
;-------------------------------------------------------------------------------

dseg	segment	para public use32 'data'
	align	4
;WARNING: These values (except vxMode) are only valid for VESA modes & mode 13h
vXSize	dd	0	;width of image in pixels
vYSize	dd	0	;height of image in pixels
bytesPerPix  dd	0	;bytes per pixel (0, 1, 2, 3, or 4)
bytesPerLine dd	0	;bytes per scan line (e.g: 320, 1920, 2560, 2048)
vxMode	dw	3	;currently selected video mode
vxPage	dw	0	;64K page (window A) of video RAM mapped into A0000h
vxPageB	dw	0	;64K page (window B) of video RAM mapped into A0000h
jmpOff	dd	0	;jump table offset; selects bytes per pixel (Point)
jmpOff2	dd	0	;jump table offset; selects bytes per pixel (ReadPix)
font8	dd	0	;linear address of 8x8 font table
font14	dd	0	;linear address of 8x14 font table
vGranSh	db	0	;window posn = vxPage << (ln2(64/granularity))
;Some video displays (notably those made by ATI) do not read pixels from
; window A. Instead they read from window B.
AtiCard	db	0	;ATI flag: 1=read from window B, else read from A
dseg	ends

;The VESA graphic modes require more video RAM than can fit in the
; allotted space (A0000h-AFFFFh). Thus it is mapped into this 64K space a
; page at a time.

;-------------------------------------------------------------------------------
;Intrinsic to clear the video screen.
; CLEAR;
;
intr40:	cmp	vxMode, 7Fh		;jump if VGA, EGA, CGA etc.
	jbe	clear			;(pjmp)

	pusha				;handle VESA modes
	xor	eax, eax		;eax:= 0 (black)
	mov	edx, bytesPerLine	;number of bytes to clear
	imul	edx, vYSize
	sub	edx, 4			;start at the end

;High word of edx = the required 64K video page
clr00:	push	edx			;save index
	ror	edx, 16			;dx:= page
	cmp	dx, vxPage		;jump if page is currently selected
	je	clr10

;Select page in dx
	push	eax			;save registers
	push	ebx
	mov	ax, 4F05h		;CPU video memory window control
	xor	ebx, ebx		;window A
	mov	vxPage, dx		;record selected page

;Set window position in video memory in window granularity units. Unfortunately
; some SVGA manufacturers don't use a granularity of 64K, and this causes a lot
; of trouble (see intr45).
	push	ecx
	mov	cl, vGranSh
	shl	dx, cl
	pop	ecx

	int	10h
	pop	ebx			;restore registers
	pop	eax
clr10:
	mov	dx, 0Ah			;high word of video RAM's base address
	rol	edx, 16			;restore offset into 64K page
	mov	fs:[edx], eax		;write 4 bytes of 0

	pop	edx			;get unmodified index back
	sub	edx, 4			;move back 4 bytes
	jge	clr00			;loop until screen cleared

;Set cursor to 0,0
	mov	ah, 02h			;set cursor function
	mov	bh, 0			;VESA only uses page 0 for text
	xor	edx, edx		;set cursor to home (row=0, col=0)
	int	10h			;call BIOS

	xor	eax, eax
	mov	startX, ax
	mov	startY, ax

	popa
	ret

;-------------------------------------------------------------------------------
;Intrinsic to plot a point on the graphic screen.
; POINT(X, Y, COLOR);
; (destroys ecx)
;
intr41:	mov	ecx, [esp]+8		;save coordinate for line intrinsic
	mov	startX, cx
	mov	edx, [esp]+4
	mov	startY, dx

	cmp	bytesPerPix, 0		;jump if planar mode (<=12h, 102, 4, 6h)
	je	pt100

	cmp	ecx, vXSize		;clip to screen dimensions
	jae	pt999
	cmp	edx, vYSize
	jae	pt999

	imul	ecx, bytesPerPix	;edx:= X*bytesPerPix + Y*bytesPerLine
	imul	edx, bytesPerLine
	add	edx, ecx

;High word of edx = required 64K video page
	ror	edx, 16			;dx:= page
	cmp	dx, vxPage		;jump if point is on currently selected
	je	pt10			; page

;Select page in dx
	push	eax			;save registers
	push	ebx
	mov	ax, 4F05h		;CPU video memory window control
	xor	ebx, ebx		;window A
	mov	vxPage, dx		;record selected page

;Set window position in video memory in window granularity units. Unfortunately
; some SVGA manufacturers don't use a granularity of 64K, and this causes a lot
; of trouble (see intr45).
	push	ecx
	mov	cl, vGranSh
	shl	dx, cl
	pop	ecx

	int	10h
	pop	ebx			;restore registers
	pop	eax
pt10:
	mov	dx, 0Ah			;high word of video RAM's base address
	rol	edx, 16			;restore offset into 64K page
	jmp	[jmpOff]		;write 1, 2, 3 or 4 bytes of color

jmpBase:mov	fs:[edx], al		;write 1 byte of color
	ret	2*4			;drop args

	org	jmpBase+14
	mov	fs:[edx], ax		;write 2 bytes of color
	ret	2*4			;drop args

	org	jmpBase+14*2
;	mov	fs:[edx], ax		;write 3 bytes of color
;	shr	eax, 16			;THIS DOES NOT WORK BECAUSE 65536 IS NOT
;	mov	fs:[edx+2], al		; EVENLY DIVISIBLE BY 3
;	ret	2*4			;drop args
	jmp	pt20

	org	jmpBase+14*3
	mov	fs:[edx], eax		;write 4 bytes of color
	ret	2*4			;drop args
pt20:
	mov	cl, 3			;write 3 bytes of color (total)
pt22:	mov	fs:[edx], al		;write 1 byte of color
	inc	edx			;next address
	rol	edx, 16			;swapped format
	cmp	dl, 0Ah			;did page change?
	je	pt25			;skip if not

;Select next page
	mov	dx, vxPage
	inc	dx
	push	eax			;save registers
	push	ebx
	mov	ax, 4F05h		;CPU video memory window control
	xor	ebx, ebx		;window A
	mov	vxPage, dx		;record selected page

;Set window position in video memory in window granularity units. Unfortunately
; some SVGA manufacturers don't use a granularity of 64K, and this causes a lot
; of trouble (see intr45).
	push	ecx
	mov	cl, vGranSh
	shl	dx, cl
	pop	ecx

	int	10h
	pop	ebx			;restore registers
	pop	eax
	mov	dx, 0Ah
pt25:
	rol	edx, 16			;normal format
	shr	eax, 8			;next RGB color
	dec	cl			;loop for 3 bytes
	jne	pt22
	ret	2*4			;drop args


;Plot point for all planar modes
;Test video mode and dispatch to proper dot routine. (BIOS is so incredibly
; slow that we do the dirty work here.)
pt100:	push	ebx			;save registers
	push	ecx
	push	edi
	mov	bl, al			;save color
	mov	ax, vxMode		;get video mode
	cmp	ax, 0Ch
	jbe	ptCga			;jump if modes 0..0Ch
	cmp	ax, 12h
	jbe	ptVga			;jump if modes 0Dh..12h
	cmp	ax, 6Ah
	je	ptVga			;jump if mode 6Ah
ptCga:					;do CGA modes and planar VESA modes
	mov	al, bl			;get color
	mov	bh, fs:scrpag		;get currently displayed screen page
	mov	ah, 0Ch			;pixel write function
	int	10h			;call BIOS
	jmp	pt190			;exit

;Draw dot for planar modes (0Dh..12h, 6Ah) (Several times faster than BIOS)
ptVga:	mov	ax, fs:widloc		;get number of bytes per scan line
	mul	dx			;Y*widloc + X/8
	mov	dx, startX
	shr	dx, 3
	add	ax, dx

	mov	edi, 0A0000h		;address of video RAM
	add	di, fs:scroff		;offset to start of video buffer
	add	di, ax

	mov	dx, 03CEh		;access VGA registers
	mov	ax, 8008h		;set bit mask register
	mov	cx, startX
	and	cl, 07h
	shr	ah, cl			;80h >> (X&7)
	out	dx, ax			;(word writes are twice as fast)

	mov	ax, 1803h		;set data rotate register to 18h for xor
	test	bl, 80h			;is xor bit actually set?
	jne	pt130			;skip if so
	 mov	ah, 0			;else set for "replace" instead of xor
pt130:	out	dx, ax

	mov	ah, bl			;set/reset register:= color
	and	ax, 7F00h
	out	dx, ax

	mov	ax, 0F01h		;enable set/reset register:= 0Fh
	out	dx, ax

	xchg	al, fs:[edi]		;latch old data & modify unmasked bits

;Set VGA registers to their default values
	mov	ax, 0FF08h		;enable all 8 bits in bit mask register
	out	dx, ax
	mov	ax, 0003h		;make sure xor mode is turned off
	out	dx, ax
	mov	ax, 0001h		;disable set/reset function
	out	dx, ax
pt190:
	pop	edi			;restore critical registers
	pop	ecx
	pop	ebx			;restore registers
pt999:	ret	2*4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to draw a line on the graphic screen.
; LINE(X, Y, COLOR);
;
intr42:
;Set up color and dotDash variables
	push	eax
	shr	eax, 8			;dotDash is normally in bits 15..8
	mov	dotDash, al
	pop	eax
	push	eax
	and	eax, 000000FFh		;color is normally in bits 7..0
	mov	color, eax

	cmp	vxMode, 100h		;VESA mode?
	jb	ll10			;skip if not
	cmp	bytesPerPix, 2		;more than 8-bit color?
	jb	ll10			;skip if not

	pop	eax
	push	eax
	shr	eax, 24			;dotDash is in bits 31..24 (high byte)
	mov	dotDash, al
	pop	eax
	push	eax
	and	eax, 00FFFFFFh		;color is in bits 23..0 (24-bit color)
	mov	color, eax
ll10:
	pop	eax

	mov	eax, [esp]+4		;set coordinates
	mov	endY, ax
	mov	eax, [esp]+8
	mov	endX, ax
	call	lline			;draw the line
	mov	ax, endX		;copy start into end
	mov	startX, ax
	mov	ax, endY
	mov	startY, ax
	ret	2*4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to move to a new coordinate.
; MOVE(X, Y);
;
intr43:	mov	startY, ax		;get coordinates
	mov	eax, [esp]+4
	mov	startX, ax
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to return the color of a pixel on the graphics screen.
; COLOR:= READPIX(X, Y);
; (destroys ecx)
;
intr44:	cmp	bytesPerPix, 0		;jump if planar mode (<=12h, 102, 4, 6h)
	je	rp100

	imul	eax, bytesPerLine	;edx:= Y*bytesPerLine + X*bytesPerPix
	mov	edx, [esp]+4		;X
	imul	edx, bytesPerPix
	add	edx, eax

;High word of edx = required 64K video page
	ror	edx, 16			;dx:= page
	cmp	AtiCard, 0		;is window A readable?
	je	rp05			;skip if so
	 cmp	dx, vxPageB		;is page (window) B selected?
	 je	rp10			;skip if so
	 jmp	rp07			;else go set page
rp05:	cmp	dx, vxPage		;jump if point is on currently selected
	je	rp10			; page (window) A

;Select page in dx
rp07:	push	eax			;save registers
	push	ebx
	mov	ax, 4F05h		;CPU video memory window control
	xor	ebx, ebx		;window A
	add	bl, AtiCard		;use window B if A is not readable
	jne	rp08			;skip if ATI card
	 mov	vxPage, dx		;record selected page
	 jmp	rp09
rp08:	mov	vxPageB, dx
rp09:
;Set window position in video memory in window granularity units. Unfortunately
; some SVGA manufacturers don't use a granularity of 64K, and this causes a lot
; of trouble (see intr45).
	push	ecx
	mov	cl, vGranSh
	shl	dx, cl
	pop	ecx

	int	10h
	pop	ebx			;restore registers
	pop	eax
rp10:
	mov	dx, 0Ah			;high word of video RAM's base address
	rol	edx, 16			;restore offset into 64K page
	jmp	[jmpOff2]		;read 1, 2, 3 or 4 bytes of color
jmpBase2:
	mov	al, fs:[edx]		;read 1 byte of color
	movzx	eax, al
	ret	4			;drop args

	org	jmpBase2+14
	mov	ax, fs:[edx]		;read 2 bytes of color
	movzx	eax, ax
	ret	4			;drop args

	org	jmpBase2+14*2
;	mov	eax, fs:[edx]		;read 3 bytes of color
;	and	eax, 00FFFFFFh		;DOES NOT WORK BECAUSE 65536 IS NOT
;	ret	4			; EVENLY DIVISIBLE BY 3
	jmp	rp20

	org	jmpBase2+14*3
	mov	eax, fs:[edx]		;read 4 bytes of color
	ret	4			;drop args
rp20:
	mov	cl, 3			;read 3 bytes of color (total)
rp22:	mov	al, fs:[edx]		;read 1 byte of color (LSB first)
	inc	edx			;next address
	rol	edx, 16			;swapped format
	cmp	dl, 0Ah			;did page change?
	je	rp25			;skip if not

;Select next page
	mov	dx, vxPage		;assume window A is readable
	cmp	AtiCard, 0		;if ATI card then use vxPageB
	je	rp22a			;skip if not
	 mov	dx, vxPageB
rp22a:	inc	dx
	push	eax			;save registers
	push	ebx
	mov	ax, 4F05h		;CPU video memory window control
	xor	ebx, ebx		;window A
	add	bl, AtiCard		;use window B if A is not readable
	jne	rp23			;skip if ATI card
	 mov	vxPage, dx		;record selected page
	 jmp	rp24
rp23:	mov	vxPageB, dx
rp24:
;Set window position in video memory in window granularity units. Unfortunately
; some SVGA manufacturers don't use a granularity of 64K, and this causes a lot
; of trouble (see intr45).
	push	ecx
	mov	cl, vGranSh
	shl	dx, cl
	pop	ecx

	int	10h
	pop	ebx			;restore registers
	pop	eax
	mov	dx, 0Ah
rp25:
	rol	edx, 16			;normal format
	ror	eax, 8			;next RGB color
	dec	cl			;loop for 3 bytes
	jne	rp22
	shr	eax, 8
	ret	4			;drop args


;Use BIOS call to handle planar modes (very slow)
rp100:	push	ebx
	push	ecx			;save registers (add +8 to esp offsets)

	mov	edx, eax		;get Y position
	mov	ecx, [esp]+4+8		;get X position
	mov	bh, fs:scrpag		;get currently displayed screen page
	mov	ah, 0Dh			;pixel read function
	int	10h			;do the function
	movzx	eax, al			;zero high bytes
	pop	ecx
	pop	ebx			;restore registers
	ret	4			;drop X argument

;-------------------------------------------------------------------------------
;Intrinsic to set up a video mode.
; SETVID($12);
; Outputs: vxMode, vXSize, vYSize, vxPage, vGranSh, bytesPerLine, bytesPerPix
; These outputs are only guaranteed for VESA modes and mode 13h. BytesPerPix is
; set to zero for all planar modes (VESA and VGA).
;
intr45:	push	ebx			;save registers
	push	ecx
	push	edi

	push	eax			;save mode
	test	ah, ah			;if mode < 100h then do non-VESA call
	jne	sv10			;skip if VESA
	 int	10h			;set VGA mode
	 jmp	sv15
sv10:
	xchg	bx, ax			;bx:= mode
	mov	ax, 4F02h		;call VESA function
	int	10h
	cmp	ax, 004Fh		;jump if no error
	je	sv15
	 mov	al, 15			;error 15: unavailable video mode
	 call	error
sv15:
	pop	eax			;get mode
	test	ax, 7F00h		;VESA mode?
	jne	sv16			;skip if so
	 and	al, 7Fh			;clear VGA  "don't clear video mem" bit
sv16:	and	ax, 7FFFh		;clear VESA "don't clear video mem" bit
	mov	vxMode, ax		;record the video mode

;Select page 0 (even if not a VESA mode)
	mov	ax, 4F05h		;CPU video memory window control
	xor	ebx, ebx		;window A
	xor	edx, edx		;page 0
	mov	vxPage, dx		;record selected page (used by mode 13h)
	int	10h
	mov	vxPageB, -1		;force page selection in ReadPix routine
					; (if ATI card)
	mov	eax, 320		;initialize in case mode 13h
	mov	bytesPerLine, eax
	mov	vXSize, eax
	mov	vYSize, 200
	mov	vGranSh, bl		;vGranSh:= 0
	inc	ebx			;bytesPerPix:= 1
	cmp	vxMode, 13h		;jump if mode 13h
	je	sv17			;(short jump)
	 dec	ebx			;bytesPerPix:= 0
	 cmp	vxMode, 7Fh		;skip if not a VESA mode
sv17:	jbe	sv20

	mov	edi, offset regs_edi	;point to regs
	mov	[regs_ax-regs_edi+edi], 4F01h
	mov	ax, vxMode
	mov	[regs_cx-regs_edi+edi], ax

	mov	eax, offset modeBlk	;the offset can be greater than 64K if
	push	eax			; the XPL code has large constant arrays
	and	ax, 000Fh		;since the BIOS call can't handle an
	mov	[regs_di-regs_edi+edi], ax ; offset > 64K we must read just the
	pop	eax			; segment address
	shr	eax, 4
	add	ax, dseg
	mov	[regs_es-regs_edi+edi], ax

	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 10h			;call interrupt 10h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode

	mov	al, 0			;assume window A is readable
	test	[modeBlk+2], 02h	;is window A readable?
	jne	sv18			;skip if so
	 mov	al, 1			;window A is not readable; use window B
sv18:	mov	AtiCard, al

	xor	eax, eax
	mov	ax, word ptr [modeBlk+16] ;bytesPerLine = modeBlk(16)
	mov	bytesPerLine, eax
	mov	ax, word ptr [modeBlk+18] ;vXSize = modeBlk(18)
	mov	vXSize, eax
	mov	ax, word ptr [modeBlk+20] ;vYSize = modeBlk(20)
	mov	vYSize, eax

;vGranSh = ln2(64/granularity)
	mov	bx, word ptr [modeBlk+4] ;granularity = modeBlk(4)

	xor	eax, eax		;avoid div by 0 -- assume 64K window
	test	bx, bx			;if bx = 0 then vGranSh:= 0
	je	sv19
	 mov	ax, 64
	 div	bl			;al(q):ah(r) <- ax / bl
	 cbw				;vGranSh:= ln2(al)
	 bsr	ax, ax
sv19:	mov	vGranSh, al

	xor	ebx, ebx		;bitsPerPix = [modeBlk+25]
	mov	bl, [modeBlk+25]
	add	bl, 3			;bytesPerPix = (bitsPerPix+3)>>3
	shr	bl, 3
sv20:	mov	bytesPerPix, ebx
	dec	ebx			;jmpOff = (bytesPerPix-1)*14 + jmpBase
	imul	ebx, 14			;14 bytes per entry in intr41's jump tbl
	push	ebx
	add	ebx, offset jmpBase
	mov	jmpOff, ebx
	pop	ebx
	add	ebx, offset jmpBase2	;ditto for ReadPix intrinsic
	mov	jmpOff2, ebx

	cmp	vxMode, 13h		;skip font table stuff that's only used
	jb	sv25			; by VESA modes and mode 13h

;make sure that interrupt vector 43h points to the 8x16 font table
	mov	edi, offset regs_edi
	mov	[regs_ax-regs_edi+edi], 1124h ;get font information
	mov	[regs_bl-regs_edi+edi], 0 ;use dl to select number of rows
	mov	[regs_dl-regs_edi+edi], 30 ;number of rows of text
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 0010h		;call interrupt 10h
	mov	ax, 0300h		;int 31h function 0300h
	int	31h			;do the call to real mode

;set address of 8x8 font table
;WARNING: Some BIOSes (such as IBM's) alter the AX register (Thanks Ed!)
	mov	[regs_ax-regs_edi+edi], 1130h ;get font information
	mov	[regs_bh-regs_edi+edi], 03h   ;select 8x8 font
	mov	ax, 0300h		;int 31h function 0300h
	int	31h			;do the call to real mode
	xor	eax, eax
	mov	ax, [regs_bp-regs_edi+edi]
	mov	font8, eax
	mov	ax, [regs_es-regs_edi+edi]
	shl	eax, 4
	add	font8, eax

;set address of 8x14 font table
	mov	[regs_ax-regs_edi+edi], 1130h ;get font information
	mov	[regs_bh-regs_edi+edi], 02h   ;select 8x14 font
	mov	ax, 0300h		;int 31h function 0300h
	int	31h			;do the call to real mode
	xor	eax, eax
	mov	ax, [regs_bp-regs_edi+edi]
	mov	font14, eax
	mov	ax, [regs_es-regs_edi+edi]
	shl	eax, 4
	add	font14, eax

;Set default for Attrib intrinsic giving white characters on black background
sv25:	mov	eax, 07h		;for text modes and 4 or 8-bit color
	mov	ebx, bytesPerPix
	cmp	bl, 2
	jne	sv30
	 mov	ax, 0AD55h		;for 16 (and 15) bit color
sv30:	cmp	bl, 3
	jb	sv35
	 mov	eax, 0A0A0A0h		;for 24-bit color
sv35:	mov	wattrb, eax

	pop	edi			;restore registers
	pop	ecx
	pop	ebx
	ret

;-------------------------------------------------------------------------------
;Intrinsic to copy an image to video display memory.
;   Paint(X,  Y,  W,  H,  Image,  W2);
;         52  48  44  40  36      eax    :stack offsets, after pusha
; X,Y are the pixel coordinates of the upper-left corner of the image on screen.
; W,H are the width and height (in pixels) of the displayed image.
; Image is the address of the image array data.
; W2 is the width (in pixels) of the image array data, which can be greater than
; or equal to W. If it is greater than W then the right side of Image will be
; clipped off. If an offset is added to Image (such as Image + Y2 + X2*W2) then
; Image can be scrolled within the W-H window.
;
;This works for most video modes, including VESA, but it currently does not work
; for planar modes (i.e. modes with 16 colors--or less--$12, $102, $104, $106).
;
;In 24-bit color modes, the Image array always uses 4 bytes per pixel.
; Video hardware uses either 4 or 3 bytes per pixel.
;
;Register usage:
; eax = W2, and source modulo = (W2-W)*sourceBytesPerPix
; ebx = bytesPerPix (in the destination video memory)
; ecx = dwords per scan line counter
; edx = pointer to destination (video memory)
; esi = pointer to source (Image)
; edi = scan line (H) counter
; ebp = destination modulo = bytesPerLine - W*bytesPerPix
;
;Modulo is the amount to add to get to the start of the next scan line. It's
; (usually) equal to 0 when the entire width of the screen is used.
;
;The key feature of this intrinsic is that it copies four bytes at a time
; to video memory. A fast processor waits HUNDREDS of cycles for each write to
; video memory. The time DOUBLES if the writes are not on dword boundaries.
; (Misaligned reads from CPU memory--indeed all the rest of this code--takes
; an insignificant amount of time.)
;
pntX	equ	52			;stack offsets (after pusha;
pntY	equ	48			; beware of other pushes and pops
pntW	equ	44			; altering these offsets)
pntH	equ	40
pntI	equ	36

intr81:	pusha				;save all registers

	cmp	vxMode, 100h		;VESA mode?
	jae	pnt02			;skip if so
	cmp	vxMode, 13h		;all other (VGA) modes except mode 13h
	jne	pnt05			; are not supported
pnt02:	mov	ebx, bytesPerPix	;ebx = bytesPerPix
	test	ebx, ebx		;    = 0 ?
	jne	pnt10			;skip if not
pnt05:	 mov	al, 15			;error 15: unsupported planar mode
	 call	error
pnt190x: jmp	pnt190			;exit
pnt10:
	mov	esi, [esp+pntI]		;source:= Image

	mov	edx, [esp+pntY]		;destination offset address on screen:= 
	imul	edx, bytesPerLine	; Y*bytesPerLine + X*bytesPerPix
	mov	ecx, [esp+pntX]
	imul	ecx, ebx
	add	edx, ecx

	sub	eax, [esp+pntW]		;eax = W2, source modulo
	mov	ebp, ebx		;    = (W2-W)*sourceBytesPerPix
	cmp	ebp, 3			;sourceBytesPerPix = 1, 2 or 4 (not 3)
	jne	pnt14
	 inc	ebp
pnt14:	imul	eax, ebp

	mov	ebp, [esp+pntW]		;ebp = destination modulo
	imul	ebp, ebx		;    = bytesPerLine - W*bytesPerPix
	neg	ebp
	jge	pnt190x			;exit if width <= 0 (or planar mode)
	add	ebp, bytesPerLine

	mov	edi, [esp+pntH]		;number of scan lines

	cld				;set direction to increment mode
	cmp	ebx, 3			;jump if 3 bytes per pixel (totally
	je	pnt100			; different code is required)

;------------- 1, 2, or 4 bytes per pixel -------------
;------------- Loop for 'edi' scan lines --------------
pnt20:	mov	ecx, [esp+pntW]		;number of pixels to copy per scan line
	push	eax			;save source modulo

;Copy any pixels at the start of the scan line that are not on a dword boundary
	test	dl, 1			;jump if destination is on word boundary
	je	pnt25
	push	edx			;save destination (video) pointer
	call	setwin
	lodsb				;get byte from Image;   al <- ds:[esi++]
	mov	fs:[edx], al
	pop	edx			;restore unmodified destination pointer
	inc	edx			;point to next byte
	dec	ecx			;one less pixel to copy
pnt25:
	test	dl, 2			;jump if on dword boundary
	je	pnt35
	push	edx			;save destination (video) pointer
	call	setwin
	lodsw				;get word from Image;   ax <- ds:[esi++]
	mov	fs:[edx], ax
	pop	edx			;restore unmodified destination pointer
	add	edx, 2			;point to next word
	cmp	ebx, 2			;one or two less pixels to copy
	je	pnt32			;jump if 2 bytes per pixel
	 dec	ecx
pnt32:	dec	ecx
pnt35:
;Copy the bulk of the scan line, 4 bytes at a time
;Convert pixels per scan line in ecx to dwords per scan line
	push	ecx			;save remaining width (in pixels)
	cmp	ebx, 2
	jg	pnt42			;jump if 4 (or 3) bytes per pixel
	 je	pnt40
	  shr	ecx, 1			;1 byte/pixel; divide by 4 to get dwords
pnt40:	 shr	ecx, 1			;2 bytes/pix;  divide by 2 to get dwords
pnt42:
	test	ecx, ecx		;jump if no dwords to copy
	jle	pnt50
pnt45:	push	edx			;save destination (video) pointer
	call	setwin
	lodsd				;get dword from Image; eax <- ds:[esi++]
	mov	fs:[edx], eax	; <---	;99% of the total time in Paint is taken
					; by this single instruction (Duron 850)
	pop	edx			;restore unmodified destination pointer
	add	edx, 4			;point to next dword

	dec	cx			;loop for all dwords on scan line
	jg	pnt45			; (cx vs. ecx limits overflow errors)
pnt50:
;Copy any remaining pixels at the end of the scan line
	pop	ecx			;width in pixels (-beginning alignment)
	imul	ecx, ebx		; * bytes per pixel

	test	cl, 1			;one more byte needed?
	je	pnt55			;jump if not
	push	edx			;save destination (video) pointer
	call	setwin
	lodsb				;get byte from Image;   al <- ds:[esi++]
	mov	fs:[edx], al
	pop	edx			;restore unmodified destination pointer
	inc	edx			;point to next byte
pnt55:
	test	cl, 2			;two more bytes needed?
	je	pnt65			;jump if not
	push	edx			;save destination (video) pointer
	call	setwin
	lodsw				;get word from Image;   ax <- ds:[esi++]
	mov	fs:[edx], ax
	pop	edx			;restore unmodified destination pointer
	add	edx, 2			;point to next word
pnt65:
	pop	eax			;move to beginning of next scan line
	add	esi, eax		;add modulo to source pointer
	add	edx, ebp		;add modulo to destination pointer

	dec	di			;loop for all scan lines
	jg	pnt20			; (di vs. edi limits overflow errors)
	jmp	pnt190			;exit

pnt100:
;------------- 3 Bytes Per Pixel ---------------------
;------------- Loop for 'edi' scan lines -------------
pnt120:	imul	ecx, [esp+pntW], 3	;number of bytes to copy per scan line
	push	eax			;save source modulo

;Copy any pixels at the start of the scan line that are not on a dword boundary
pnt123:	test	dl, 3			;is destination at start of dword?
	je	pnt125			;jump if so
pnt124:	lodsb				;get byte from Image;   al <- ds:[esi++]
	test	esi, 3			;was this the unused byte? (needed after
	je	pnt124			;jump if so; get next byte   first line)
	push	edx			;store byte in video memory
	call	setwin
	mov	fs:[edx], al
	pop	edx			;restore unmodified destination pointer
	inc	edx			;point to next byte in destination
	dec	ecx			;one less byte to copy
	jmp	pnt123			;loop until dword boundary
pnt125:
;Copy the bulk of the scan line, 4 bytes at a time
	push	ecx			;save bytes to copy for end of scan line
	shr	ecx, 2			;convert number of bytes to dwords
	je	pnt160			;skip if none to copy

pnt145:	mov	bl, 4			;initialize count for 4 bytes per dword
pnt147:	lodsb				;get byte from Image;   al <- ds:[esi++]
	test	esi, 3			;was this the unused byte?
	je	pnt147			;jump if so; get next byte
	ror	eax, 8			;pack byte into dword
	dec	bl			;loop for 4 bytes
	jne	pnt147

	push	edx			;store dword into video memory
	call	setwin
	mov	fs:[edx], eax	; <---	;99% of the total time in Paint is taken
					; by this single instruction (Duron 850)
	pop	edx			;restore unmodified destination pointer
	add	edx, 4			;point to next dword in destination

	dec	ecx			;loop for all dwords on scan line
	jg	pnt145
pnt160:
;Copy any remaining pixels at the end of the scan line
	pop	ecx			;get bytes to copy
	and	ecx, 3			;see if is a fraction of a dword
	inc	ecx			;(cheap trick to test status)
pnt170:	dec	ecx			;any remaining bytes?
	je	pnt180			;jump if not; exit loop

pnt172:	lodsb				;get byte from Image;   al <- ds:[esi++]
	test	esi, 3			;was this the unused byte?
	je	pnt172			;jump if so; get next byte
	push	edx			;store byte into video memory
	call	setwin
	mov	fs:[edx], al
	pop	edx			;restore unmodified destination pointer
	inc	edx			;point to next byte in destination
	jmp	pnt170			;loop for remaining bytes
pnt180:
	pop	eax			;move to beginning of next scan line
	add	esi, eax		;add modulo to source pointer
	add	edx, ebp		;add modulo to destination pointer

	dec	edi			;loop for all scan lines
	jg	pnt120

pnt190:	popa				;restore registers
	ret	4*5			;drop 5 args (6th arg is passed in eax)

;-------------------------------------------------------------------------------
;Set the 64K video memory window in dx, so that it appears at A0000h.
; WARNING: Don't use this for reading pixels, because of ATI cards.
; Outputs edx.
;
setwin:	ror	edx, 16			;dx = required 64K video page (window)
	cmp	dx, vxPage		;jump if page is already selected
	je	setw90

	pusha
	mov	ax, 4F05h		;select video page (window) in dx
	xor	ebx, ebx		;select window A for int 10h
	mov	vxPage, dx		;record selected page
	mov	cl, vGranSh		;convert 64K page to granularity units
	shl	edx, cl			;(unfortunately not everyone uses 64K)
	int	10h
	popa
setw90:
	mov	dx, 0Ah			;address video memory and
	rol	edx, 16			; restore offset into 64K page
	ret

;-------------------------------------------------------------------------------
;Routine to draw a straight line. This uses Bresenham's algorithm and calls the
; Point intrinsic for simplicity, but it's slower than 16-bit XPL.
;
dseg	segment	para public use32 'data'
	align	4
startX	dw	0			;begining x position
endX	dw	0			;ending x position
startY	dw	0			;begining y position
endY	dw	0			;ending y position
dagIncX	dw	0			;diagonal x increment
dagIncY	dw	0			;diagonal y increment
dShort	dw	0			;shortest distance
strIncX	dw	0			;straight x increment
strIncY	dw	0			;straight y increment
strCnt	dw	0			;straight count
dagCnt	dw	0			;diagonal count
color	dd	0			;color attribute
dotDash	db	0			;high byte of color for dotted lines
	db	0			;(align)
dseg	ends

lline:	pusha				;save registers
	mov	bh, fs:scrpag		;get currently displayed screen page
	mov	cx, 1			;set initial increments
	mov	dx, cx

;Calculate vertical difference
	mov	di, endY		;subtract start from end
	sub	di, startY
	jge	short llin10		;skip if positive
	neg	dx			;change sign of increment
	neg	di			;and difference
llin10:	mov	dagIncY, dx		;save y increment

;Calculate horizontal difference
	mov	si, endX		;subtract start from end
	sub	si, startX
	jge	short llin20		;skip if positive
	neg	cx			;change sign of increment
	neg	si			;and difference
llin20:	mov	dagIncX, cx		;save x incremnt

;See which difference is greater
	cmp	si, di			;is horizonal longer than vertical
	jge	short llin30		;skip if so
	xor	cx, cx			;no x change when straight
	xchg	si, di			;put longest in cx
	jmp	short llin40		;now save them
llin30:	xor	dx, dx			;no y change when straight
llin40:	mov	dShort, di		;save value of short distance
	mov	strIncX, cx		;save increments
	mov	strIncY, dx

;Calculate adjustment factor
	mov 	ax, dShort		;get short value
	add	ax, ax			;double it
	mov	strCnt, ax		;save it
	sub	ax, si			;subtract long distance
	mov	di, ax			;use as loop counter
	sub	ax, si			;subtract long distance
	mov	dagCnt, ax		;save it

;Set up to draw the line
	mov	cx, startX		;set starting x
	mov	dx, startY		;set starting y
	inc	si			;inc long for endpoint

llin50:	dec	si			;decrement long distance
	js	short llin90		;exit if done

	rol	dotDash, 1		;are we between dots?
	jc	short llin60		;skip if so

	pusha
	xor	eax, eax
	mov	ax, cx
	push	eax
	mov	ax, dx
	push	eax
	mov	eax, color
	call	intr41			;Point(X, Y, color);
	popa
llin60:
	test	di, di			;straight segment?
	jge	short llin70		;do diagonal if not

;Draw straight line segment
	add	cx, strIncX		;set up x and y increments
	add	dx, strIncY		;for straight segments
	add	di, strCnt		;add to adjustment factor
	jmp	short llin50		;do next pixel

;Draw diagonal line segment
llin70:	add	cx, dagIncX		;set x and y increments
	add	dx, dagIncY		;for diagonal segment
	add	di, dagCnt		;adjust adjustment
	jmp	short llin50		;do next pixel

llin90:	popa				;restore registers
	ret

;===============================================================================
;		ROUTINES TO SUPPORT WINDOW-TYPE APPLICATIONS
;===============================================================================
;Intrinsic to set the attribute
; ATTRIB(0);
;
intr69:	mov	wattrb, eax		;set current attribute
	ret

dseg	segment	para public use32 'data'
wattrb	dd	07h			;attribute for current window
wmode	label	word			;window mode
	db	0
curmod	db	0			;cursor mode

;Window corners
	align	4
ulxwnd	dw	0			;current upper-left coordinates
ulywnd	dw	0
lrxwnd	dw	79			;current lower-right coordinates
lrywnd	dw	24

;Memory locations for BIOS parameters
curloc	equ	0450h			;memory loc for cursor position
modloc	equ	0449h			;memory loc for video mode
widloc	equ	044Ah			;holds screen width (in characters)
scrpag	equ	0462h			;currently displayed video page number
scroff	equ	044Eh			;offset to current video screen
rowloc	equ	0484h			;number of displayed character rows -1
dseg	ends

;-------------------------------------------------------------------------------
;Intrinsic to set up a display window for device channel 6.
; SETWIND(UPPER_LEFT_X, UPPER_LEFT_Y, LOWER_LEFT_X, LOWER_LEFT_Y, MODE, FILL)
; MODE: 0 = scroll, 1 = wrap, 2 = clip.
;
intr70:	mov	edx, [esp]+4		;get arguments
	mov	wmode, dx
	mov	edx, [esp]+8		;get corners of window
	mov	lrywnd, dx
	mov	edx, [esp]+12
	mov	lrxwnd, dx
	mov	edx, [esp]+16
	mov	ulywnd, dx
	mov	edx, [esp]+20
	mov	ulxwnd, dx

	push	ebx
	push	ecx			;save registers
	test	eax, eax		;fill screen?
	je	short setwn1		;skip if not

	mov	ax, 0700h		;set and fill window
	call	nVidia			;compensate for nVidia card bug
	call	setbkgnd
	mov	ch, byte ptr ulywnd	;set upper left corner
	mov	cl, byte ptr ulxwnd
	mov	dh, byte ptr lrywnd	;set lower right corner
	mov	dl, byte ptr lrxwnd
	int	10h			;call BIOS
setwn1:
	mov	ah, 02h			;set cursor to upper left corner
	mov	bh, fs:scrpag		;get currently displayed screen page
	mov	dl, byte ptr ulxwnd	;set cursor position
	mov	dh, byte ptr ulywnd
	int	10h			;call BIOS

	pop	ecx
	pop	ebx			;restore registers
	ret	5*4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to output a string of characters just like TEXT does except
; this allows the extended character set to be used, that is, characters
; with codes from 80h thru FEh. These strings must be terminated by a
; space character with its MSB set ($A0). This character is not sent out, 
; thus the extended character "a'" cannot be used in the string.
; RAWTEXT(DEV, STRING);
;
intr71:	mov	edx, eax		;get address of string from tos
	mov	eax, [esp]+4		;get device number
	mov	nowdev, ax
	jmp	short rawtx1		;enter loop

rawtx2:	call	chout			;output character to device
	inc	edx			;point to next character
rawtx1:	mov	al, [edx]		;get character at pointer
	cmp	al, 0A0h		;terminating character?
	jne	short rawtx2		;jump if not
	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to highlight parts of the screen with a specific attribute
; without changing the underlying text.
; HIGHLIGHT(X1, Y1, X2, Y2, ATTRIBUTE)
;
dseg	segment	para public use32 'data'
	align	4
hattrb	dw	0			;highlight attribute
x1wind	dw	0			;window corner coordinates
y1wind	dw	0
x2wind	dw	0
y2wind	dw	0

oldcur	dw	0			;old cursor type
oldcor	dw	0			;old cursor coordinate
dseg	ends

intr72:	mov	hattrb, ax		;get the coordinates and attributes
	mov	eax, [esp]+4
	mov	y2wind, ax
	mov	eax, [esp]+8
	mov	x2wind, ax
	mov	eax, [esp]+12
	mov	y1wind, ax
	mov	eax, [esp]+16
	mov	x1wind, ax

	pusha				;save registers
	xor	ecx, ecx		;clear high word

;Save old cursor and coordinate
	mov	ah, 03h			;set function
	mov	bh, fs:scrpag		;get currently displayed screen page
	int	10h			;call BIOS
	mov	oldcur, cx		;save cursor position
	mov	oldcor, dx		;save cursor type

;Turn off cursor
	mov	ah, 01h			;set function
	mov	cx, 2000h		;set no cursor
	int	10h			;call BIOS

	mov	dh, byte ptr y1wind	;get start y
	mov	cx, y2wind		;get ending y
	sub	cl, dh			;calculate size
	inc	ecx			;plus one

loop2:	push	ecx			;save y count

	mov	dl, byte ptr x1wind	;get start x
	mov	cx, x2wind		;get ending x
	sub	cl, dl			;calculate size
	inc	ecx			;plus one

loop1:	push	ecx			;save x count

;Start of inner part of loop
;Set cursor position
	mov	ah, 02h			;select cursor function
	int	10h			;call BIOS

;Read character at cursor into al (attribute in ah is ignored)
	mov	ah, 08h			;select read at cursor function
	int	10h			;call BIOS

;Rewrite with new attribute
	mov	ah, 9			;set function
	mov	bl, byte ptr hattrb	;set new attribute

	mov	cl, byte ptr fs:modloc	;get video mode
	cmp	cl, 3			;is it modes 0-3
	jbe	short hili0		;jump if so
	cmp	cl, 7			;is it MDA screen?
	je	short hili0		;jump if so
	cmp	cl, 13h			;256-color mode?
	je	short hili0		;jump if so (bkgnd=page=0=black)

	test	bl, 0F0h		;is there a background color?
	je	short hili0		;jump if not
	push	eax			;save char
	mov	al, 0DBh		;write a block in background color
	shr	bl, 4			;shift background to foreground
	mov	cx, 1			;write one byte
	int	10h			;call BIOS

	pop	eax			;get character back
	xor	bl, byte ptr hattrb	;get color for foreground
	and	bl, 00Fh		;(two xors with same val = original val)
	or	bl, 080h		;specify xor write for BIOS
hili0:
	mov	cx, 1			;write one byte
	int	10h			;call BIOS

;End of inner part of loop
;Advance inner and outer loop control
	pop	ecx			;restore count x
	inc	dl			;advance x
	dec	ecx			;and loop
	jne	short loop1

	pop	ecx			;restore y count
	inc	dh			;advance y
	dec	ecx			;and loop
	jne	short loop2

;Restore old cursor position
	mov	ah, 02h			;set BIOS function
	mov	dx, oldcor		;restore coordinate
	int	10h			;call BIOS

;Restore old cursor type
	mov	cx, oldcur		;get old cursor type
	mov	ah, 01h			;set BIOS function
	int	10h			;call BIOS

	popa				;restore registers
	ret	4*4			;drop args

;===============================================================================
;				DEVICE 6
;===============================================================================
;Reset window parameters for normal full screen
; OPENO(6);
;
reswnd:	push	eax
	push	ebx
	push	edx			;save registers

	and	wmode, 0		;set to scroll mode with moving cursor
	mov	wattrb, 07h		;set white on black attribute
	call	resfullwnd		;reset window for full screen

	mov	ah, 02h			;set cursor to upper-left corner
	mov	bh, fs:scrpag		;get currently displayed screen page
	mov	dl, byte ptr ulxwnd	;set cursor position
	mov	dh, byte ptr ulywnd
	int	010h			;call BIOS

	pop	edx
	pop	ebx
	pop	eax			;restore registers
	clc				;indicate no errors
	ret

;-------------------------------------------------------------------------------
;Reset window for full screen
;
resfullwnd:
	push	eax			;save register
	xor	eax, eax
	mov	ulxwnd, ax		;set upper-left coordinates to 0, 0
	mov	ulywnd, ax

	mov	al, fs:rowloc		;set number of character rows -1
	mov	lrywnd, ax
	cmp	ax, 11			;if BIOS value is too weird then use
	jle	short rfw10		; standard screen height (25 rows)
	cmp	ax, 127
	jle	short rfw20
rfw10:	mov	lrywnd, 24
rfw20:
	mov	ax, fs:widloc		;set number of character columns -1
	dec	eax
	mov	lrxwnd, ax
	cmp	ax, 19			;if BIOS value is too weird use standard
	jle	short rfw30		; screen width (80 columns)
	cmp	ax, 159
	jle	short rfw40
rfw30:	mov	lrxwnd, 79
rfw40:
	pop	eax			;restore register
	ret

;-------------------------------------------------------------------------------
;Input character from keyboard and echo it to device 6.
; Returns:
;	al = ASCII character
;	ah = keyboard scan code
;
wndchn:	mov	ah, 0			;set key input function
	int	016h			;call BIOS function
;fall into wndout to echo character...

;-------------------------------------------------------------------------------
;Output a character to current window.
; Set up for speed so that:
;	al=char
;	bl=video page
;	bh=video mode
;	cl=screen width
;	dx=cursor position
;	si=index to cursor
;
wndout:	cmp	vxMode, 100h		;jump if VESA mode
	jae	vesaout
	pusha				;save registers

	mov	bl, fs:scrpag		;get video page
	mov	bh, 0			;zero high nibble
	add	ebx, ebx		;make into index for word table
	mov	esi, ebx		;put in index reg
	mov	dx, fs:curloc[si]	;get cursor position
	mov	bh, fs:scrpag		;set up for DOS calls
	mov	bl, byte ptr fs:modloc	;get video mode
	mov	cx, fs:widloc		;get screen width
	call	wndmod			;decode window mode

	popa				;restore registers
	clc				;indicate no errors
	ret

;-------------------------------------------------------------------------------
;Handle all the different windowing modes
;
wndmod:	cmp	byte ptr wmode, 0	;scroll mode?
	je	short scrmod		;jump if so
	cmp	byte ptr wmode, 1	;wrap around mode?
	je	short wrpmod		;jump if so

;Here to handle clipped mode
	cmp	al, cr			;carriage return?
	je	wndret			;jump if so--handle return
	cmp	al, lf			;line feed?
	jne	short clpmd1		;jump if not

	cmp	dh, byte ptr lrywnd	;out of window?
	jae	short clpext		;skip if so
	inc	dh			;advance line pointer
	jmp	wndext			;then exit

clpext:	ret				;then just exit

;Here to output the character
clpmd1:	cmp	dl, byte ptr ulxwnd	;cursor out of window?
	jb	short clpext
	cmp	dl, byte ptr lrxwnd
	ja	short clpext
	cmp	dh, byte ptr ulywnd
	jb	short clpext
	cmp	dh, byte ptr lrywnd
	ja	short clpext

;Now write the character and attribute to the screen
	call	scnwrt			;write to screen
	jmp	short wndext		;and exit

;Here to handle wrap-around mode windowing
wrpmod:	cmp	al, cr			;carriage return?
	je	short wndret		;then handle it
	cmp	al, lf			;line feed?
	jne	short wrpmd1		;skip if not

	inc	dh			;advance line pointer
	cmp	dh, byte ptr lrywnd	;out of window?
	jbe	short wrpmd9		;exit if not
	mov	dh, byte ptr ulywnd	;reset to left edge
wrpmd9:	jmp	short wndext		;then exit

wrpmd1:	call	scnwrt			;write to screen
	cmp	dl, byte ptr lrxwnd	;out of window?
	jbe	short wrpmd2		;skip if not
	mov	dl, byte ptr ulxwnd	;reset to left edge
wrpmd2:	jmp	short wndext		;and exit

;Handle scrolling windows
scrmod:	cmp	al, cr			;carriage return?
	je	short wndret		;then handle it
	cmp	al, lf			;line feed?
	jne	short scrmd1		;skip if not

scrmd0:	inc	dh			;advance line pointer
	cmp	dh, byte ptr lrywnd	;out of window?
	jbe	short wndext		;exit if not
	dec	dh			;move cursor back
	jmp	short scrwnd		;then scroll

scrmd1:	call	scnwrt			;write to screen
	cmp	dl, byte ptr lrxwnd	;out of window?
	jbe	short wndext		;exit if not
	mov	dl, byte ptr ulxwnd	;reset to left edge
	jmp	short scrmd0		;then scroll window

wndret:	mov	dl, byte ptr ulxwnd	;reset to left edge of window
wndext:	cmp	curmod, 0		;moving cursor?
	je	short wndxt1		;skip if yes

;Here to just update cursor without moving it
	mov	fs:curloc[si], dx	;update cursor position
	ret

;Here to update cursor
wndxt1:	mov	ax, 0200h		;set new cursor position
	int	010h			;call BIOS
	ret

;scroll the contents of the window
scrwnd:	call	wndext			;reset cursor
	mov	ax, 0601h		;set to scroll one line
	call	nVidia			;compensate for nVidia card bug
	call	setbkgnd
	mov	ch, byte ptr ulywnd	;set upper-left corner
	mov	cl, byte ptr ulxwnd
	mov	dh, byte ptr lrywnd	;set lower-right corner
	mov	dl, byte ptr lrxwnd
	int	010h			;call BIOS
	ret

;-------------------------------------------------------------------------------
;Write character and attribute to screen.
; If mode 0-3 or 7 handle it here, otherwise let BIOS do it.
; Registers eax, bx, cx, dx, edi, and bp are destroyed.
;
scnwrt:	mov	edi, 0B8000h		;point to CGA screen
	cmp	bl, 3			;is it CGA screen?
	jle	short scnwt2		;exit if so
	cmp	bl, 7			;is it MDA screen?
	je	short scnwt1		;exit if so

;Here to have BIOS handle writing to screen

	mov	ah, 9			;set function
	mov	cx, 1			;write one byte
	cmp	bl, 13h			;256-color mode?
	mov	bl, byte ptr wattrb	; (set the attribute)
	jne	scnwt4			;jump if not mode 13h
	push	bx			;save page in bh
	mov	bh, byte ptr wattrb+1	;256 background colors
	call	nVidia13		;deal with nVidia incompatibility
	pop	bx			;restore page
	jmp	scnwt9
scnwt4:
	test	bl, 0F0h		;is there a background color?
	je	short scnwt0		;jump if not
	push	eax			;save char
	mov	al, bl			;save foreground color in al
	and	al, 0Fh
	shr	bl, 4			;shift background to foreground
	cmp	al, bl			;does foreground = background color
	je	short scnwt3		;jump if so--go xor color
	mov	al, 0DBh		;write a block in background color
	int	10h			;call BIOS

	xor	bl, byte ptr wattrb	;get color for foreground
	and	bl, 0Fh			;(two xors with same val = original val)
scnwt3:	pop	eax			;get character back
	or	bl, 80h			;specify xor write for BIOS
scnwt0:
	int	10h			;call BIOS

scnwt9:	inc	dl			;advance X coordintate
	ret

;Here to do actual screen write
;al=char, dx=cursor position, cl=screen width
scnwt1:	mov	edi, 0B0000h		;point to the MDA screen
scnwt2:	mov	ah, byte ptr wattrb	;load attribute
	mov	ebp, eax		;save char and attribute

	xor	eax, eax
	mov	al, cl			;get screen width
	mul	dh			;width * rows
	add	al, dl			;column + (width * rows)
	adc	ah, 0			;handle carry
	add	eax, eax		;times 2, for 2 bytes per char
	add	ax, fs:scroff		;add in page offset

	add	edi, eax		;add to screen base
	mov	fs:[edi], bp		;write character and attribute to screen

	inc	dl			;advance X coordinate
	ret

;-------------------------------------------------------------------------------
;Set the background fill color in bh. If the video mode is text then the
; attribute provided by the Attrib intrinsic (69) can be used directly.
; But if the video mode is graphic then the high nibble of the attribute
; must be used to set the fill color. (Attemping to use a background
; color other than black (0) for modes 4, 5, 6, and 11h gives vertical
; stripes instead.) Mode 13h has 256 foreground and background colors.

setbkgnd:
	mov	bl, byte ptr fs:modloc	;get video mode
	mov	bh, byte ptr wattrb	;get attribute
	cmp	bl, 3			;skip if it's a text mode
	jbe	sw10
	cmp	bl, 7
	je	sw10
	mov	cl, 4			;get high nibble of attribute 
	shr	bh, cl
	cmp	bl, 13h			;graphic mode 13h has 256 background
	jne	sw10			; colors
	 mov	bh, byte ptr wattrb+1
sw10:	ret

;-------------------------------------------------------------------------------
;Deal with the buggy nVidia card. Make sure VGA registers are in their default
; states (otherwise you're apt to get a screenful of vertical lines).

nVidia:	push	ax
	push	dx

	mov	dx, 3CEh
	mov	ax, 0FF08h		;enable all 8 bits in bit mask register
	out	dx, ax
	mov	ax, 0003h		;make sure xor mode is turned off
	out	dx, ax
	mov	ax, 0001h		;disable set/reset function
	out	dx, ax

	pop	dx
	pop	ax
	ret

;-------------------------------------------------------------------------------
;Routine to display a character on a mode 13h graphic screen. Normally interrupt
; 10h function 09h would be used, but the nVidia card doesn't handle background
; colors other than black.
;Inputs:
; al = character (0..255)
; bl = foreground color (0..255)
; bh = background color (0..255)
; [450h] = BIOS cursor position: (column in low byte and row in high byte)

nVidia13:
	pusha

;Byte:= Peek(FontSeg,  FontOff + Ch*FontHeight);
	movzx	eax, al
	shl	eax, 3		;multiply by 8 for 8 dots of font width
	mov	esi, font8	;8x8 font table
	add	esi, eax

;Point to location on graphic screen that corresponds to char cursor position
;edi:= (row*320 + col)*8
	xor	eax, eax
	mov	ax, fs:[450h]
	push	eax		;save column that is in al
	mov	al, ah		;get row
	mov	ah, 0

	mov	dx, 320
	mul	dx		;dx:ax:= ax*320
	mov	edi, eax
	pop	eax		;get column
	mov	ah, 0
	add	edi, eax
	shl	edi, 3		;*8

	mov	ecx, 8		;for 8 scan lines in font...
@@20:	mov	dl, 8		;for 8 dots of font width...
	mov	dh, fs:[esi]	;get font byte from table

@@30:	shl	dh, 1		;test font bit by moving it into carry flag
	mov	al, bh		;background color
	jnc	@@35
	 mov	al, bl		;foreground color
@@35:	mov	fs:[edi+0A0000h], al
	inc	edi

	dec	dx		;(dh=0 when dl=0)
	jne	@@30		;loop for 8 dots of font width

	inc	esi		;next byte in font table
	add	edi, 320-8	;next scan line down
	loop	@@20		;loop for 8 scan lines

	popa
	ret

;-------------------------------------------------------------------------------
;Display a character for VESA graphic modes. The character in register al is
; displayed at the current cursor position. The cursor position is advanced to
; the right.
; WARNING: This does not scroll or handle more than 8-bit color.
; Also there is no form feed or flashing cursor.
;
;Many display devices do not display characters in VESA graphic modes (even in
; some cases when they claim to do so). To provide this capability, this routine
; is called by device 6, and it displays characters when the video mode is
; greater than $FF. Devices 0 and 1 still call DOS and BIOS routines to display
; characters like they have always done.
;
;An 8x16 font provides an 80x30 character display in mode $101 (640x480).
;
; Register usage:
;  al = character
;  eax= foreground color attribute
;  bl = font byte
;  bh = font height in pixels (16, 8, 14)
;  cl = horizontal pixel counter (8..0)
;  ch = vertical pixel counter (16..0)
;  edx= X coordinate (in pixels)
;  edi= Y coordinate (in pixels)
;  esi= pointer into font table
;  ebp= background color attribute
;  fs = selector pointing to absolute zero (00000000h)
;
vesaout:pusha				;save all registers

	cmp	al, cr			;carriage return?
	jne	vout00			;skip if not
	 mov	startX, 0		;set cursor position to left edge
	 jmp	vout90			;exit
vout00:
	movzx	esi, al			;point to first font byte for character
	cmp	ah, 1			;if device = $106 then
	jne	vout05
	 shl	esi, 3			;font height = 8
	 add	esi, font8		;plus base address of font table
	 mov	bh, 8
	 jmp	vout08
vout05:
	cmp	ah, 2			;if device = $206 then
	jne	vout06
	 imul	esi, 14			;font height = 14 (often unavailable)
	 add	esi, font14		;plus base address of font table
	 mov	bh, 14
	 jmp	vout08
vout06:
	xor	edx, edx		;get height of font (points) from BIOS
	mov	dx, fs:[485h]
	mov	bh, dl			;save height
	imul	esi, edx		;multiply by number of bytes per char

	mov	dx, fs:[10Ch+2]		;get segment address of font table
	shl	edx, 4			;convert to linear address
	xor	edi, edi
	mov	di, fs:[10Ch]		;add in offset
	add	edx, edi
	add	esi, edx		;plus base address of font table
vout08:
	cmp	al, lf			;line feed?
	je	vout80			;jump if so

;set foreground and background colors depending on color depth of graphic mode
	mov	al, byte ptr bytesPerPix
	cmp	al, 0			;4-bit color?
	jne	vout10			;skip if not
	movzx	eax, byte ptr wattrb	;foreground color
	mov	ebp, eax
	and	al, 0Fh
	shr	ebp, 4
	jmp	vout16
vout10:
	cmp	al, 1			;8-bit color?
	jne	vout12			;skip if not
	movzx	eax, byte ptr wattrb	;foreground color
	movzx	ebp, byte ptr wattrb+1	;background color
	jmp	vout16
vout12:
	cmp	al, 2			;16-bit (or 15-bit) color?
	jne	vout14			;skip if not
	movzx	eax, word ptr wattrb	;foreground color
	movzx	ebp, word ptr wattrb+2	;background color
	jmp	vout16
vout14:					;24-bit color
	mov	eax, wattrb		;foreground color
	xor	ebp, ebp		;background color: black
vout16:
	movzx	edx, startX		;get X coordinate
	movzx	edi, startY		;get Y coordinate
	mov	ch, bh			;for the number of pixels high...
vout20:	mov	bl, fs:[esi]		;get font byte
	inc	esi			;point to next font byte

	mov	cl, 8			;for each bit in the font byte...
vout30:	pusha
	push	edx			;pass X coordinate on stack
	push	edi			;pass Y coordinate on stack
	test	bl, 80h			;if bit is set then
	jne	vout35			;(jump if not)
	 mov	eax, ebp		;get background color
vout35:	call	intr41			;Point(X, Y, C) intrinsic
	popa
	inc	edx			;move one pixel to the right
	shl	bl, 1			;shift font byte <<
	dec	cl			;loop for 8 pixels wide
	jne	vout30

	sub	edx, 8
	inc	edi			;next scan line down
	dec	ch			;loop for 16 pixels high
	jne	vout20

	inc	startX			;set position to start of next char
	xor	eax, eax
	mov	al, bh			;font height
	dec	eax
	sub	startY, ax

	mov	ax, startX		;check for line wrap
	add	ax, 8			;if startX+8 >= vXSize then crlf
	cmp	ax, word ptr vXSize
	jb	vout90
	 mov	startX, 0		;carriage return
vout80:
	xor	eax, eax		;line feed
	mov	al, bh			;font height
	add	startY, ax
vout90:
	popa				;restore all registers
	clc				;indicate no errors
	ret

;===============================================================================
;Set text cursor position. X position is in bx; Y position is in ax. This also
; affects the Move intrinsic's position, so that VESA text can be positioned
; anywhere.
;
mcurse:	push	eax			;save registers
	push	ebx
	push	edx

	mov	dl, bl			;set position for BIOS: X
	mov	dh, al			; Y

	shl	ebx,3			;set graphic position: X*8
	mov	startX, bx

	imul	ax, fs:[485h]		;Y*POINTS (font height)
	mov	startY, ax

	mov	ah, 02h			;set function
	mov	bh, fs:scrpag		;get currently displayed screen page
	int	10h			;call BIOS

	pop	edx			;restore registers
	pop	ebx
	pop	eax
	ret

;===============================================================================
;				I/O ROUTINES
;===============================================================================
;Intrinsic to set the handle for the current input or output file.
; FSET(HANDLE, ^I);
; This also determines the size of the buffer:
;
;		Input	Output
;	Large:	  I	  O
;	Small:	  i	  o
;
intr24:	mov	ah, al			;save copy of handle type
	or	al, 20h			;convert handle type to lowercase
	cmp	al, 'i'			;input handle?
	jne	short fset10		;jump if not
	mov	edx, [esp]+4		;set input handle
	mov	inhand, dx
	mov	inbflg, ah		;set buffer type
	ret	4			;drop args
fset10:
	cmp	al, 'o'			;output handle?
	jne	short fset20		;jump if not
	mov	edx, [esp]+4		;set output handle
	mov	othand, dx		;set output handle
	mov	otbflg, ah		;set buffer type
fset20:	ret	4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to call another program as a subroutine.
; CHAIN("C:\TEST\XDEMO.COM");
;
dseg	segment	para public use32 'data'
	align	4
;Parameter block:
exeprb	dw	0			;segment of environment block
	dw	0			;address of command tail
	dw	0			;segment of command tail
	dw	0			;address of fcb 1
	dw	0			;segment of fcb 1
	dw	0			;address of fcb 2
	dw	0			;segment of fcb 2

stkseg	dw	0			;stack segment
stkreg	dw	0			;stack pointer
namlen	equ	80
wrknam	db	namlen+1 dup(?)		;working file name
dseg	ends

intr28:	pusha				;save registers
	mov	edx, eax		;get file name pointer
	and	cflag, 0		;clear carry flag
	call	parnam			;parse file string

;Set up parameter block for program
	and	exeprb, 0		;set environment block
	mov	exeprb+2, 80h		;set com tail address
	mov	ax, pspseg		;set com tail segment
	mov	exeprb+4, ax
	mov	exeprb+8, ax		;set fcb1 segment
	mov	exeprb+12, ax		;set fcb2 segment
	mov	exeprb+6, 5ch		;set fcb1 address
	mov	exeprb+10, 6ch		;set fcb2 address

;Set up and call subprogram
	mov	stkseg, ss		;save stack segment
	mov	stkreg, sp		; and stack pointer

	mov	ax, 4B00h		;function to execute a program
	mov	dosfun, ah		;save in case of error
	mov	edi, offset regs_edi    ;point to regs
	mov	[regs_ax-regs_edi+edi], ax
	mov	[regs_ebx-regs_edi+edi], offset exeprb ;point to parameter block
	mov	[regs_es-regs_edi+edi], dseg
	mov	[regs_edx-regs_edi+edi], offset wrknam ;point to file name
	mov	[regs_ds-regs_edi+edi], dseg
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode
	push	[regs_flags-regs_edi+edi] ;get flags returned by real-mode int
	popf
	sti
	jnc	short exext2		;skip if no errors
	 mov	dosret, ax		;save error code
	 or	cflag, -1		;set carry flag
exext2:
	mov	ss, stkseg		;restore stack
	mov	sp, stkreg
	popa				;restore registers
	ret

;-------------------------------------------------------------------------------
;Copy (parse) the file name pointed to by edx into "wrknam". Any leading spaces
; are removed. The file name can be terminated with a comma, carriage return,
; binary zero, or with the MSB set on the last character.
;
parnam:	push	eax
	push	ecx
	push	esi
	push	edi			;save registers

	mov	esi, edx		;point esi to file name (source)
	lea	edi, wrknam		;point destination to wrknam
	mov	ecx, namlen-1		;maximum number of characters to copy -1

;Eat any leading spaces
eatspa:	mov	al, [esi]		;get a char
	inc	esi
	cmp	al, 20h			;is it a space?
	jne	short notspa		;exit loop if not
	dec	ecx
	jne	short eatspa		;loop for max characters
	jmp	short parst3		;here if nothing but spaces

parst1:	mov	al, [esi]		;get next char
	inc	esi
notspa:	test	al, al			;is it a zero?
	je	short parst3		;jump if so
	cmp	al, cr			;is it a carriage return?
	je	short parst3		;jump if so
	cmp	al, ','			;is it a comma?
	je	short parst3		;jump if so
	test	al, al			;is the most significant bit set?
	js	short parst2		;jump if so
	mov	[edi], al		;store byte into wrknam
	inc	edi
	dec	ecx			;loop until terminator is found
	jne	short parst1		; or max length-1 is reached

parst2:	and	al, 7Fh			;strip most significant bit off char
	mov	[edi], al		;store last char
	inc	edi
parst3:	mov	byte ptr [edi], 0	;terminate wrknam with a zero

	pop	edi
	pop	esi
	pop	ecx
	pop	eax			;retore registers
	ret

;-------------------------------------------------------------------------------
;Intrinsic to open a file by name.
; HANDLE:= FOPEN("C:\TEST\FILENAME.EXT", MODE);
;
intr29:	mov	edx, [esp]+4		;get address of file name
	push	ecx			;save register
	call	parnam			;copy file name into wrknam
	test	eax, eax		;is this read mode (=0)?
	jne	short opodk1		;jump if not
	mov	ax, 3d00h		;open file for reading
	jmp	short opodk3		;enter common code
opodk1:
	cmp	al, 1			;is this write mode (=1)?
	jne	short opodk9		;jump if not--exit

	mov	ah, 3ch			;create (or open) a file
	xor	ecx, ecx		;set normal mode
opodk3:	lea	dx, wrknam		;point to file name
	call	doscal			;call DOS routine
	jnc	short opodk9		;skip no error
	call	dosioerr		;handle I/O error
	mov	ax, 0FFFFh		;create an illegal handle
opodk9:
	movzx	eax, ax
	pop	ecx			;restore register
	ret	4			;drop arg

;-------------------------------------------------------------------------------
;Handle disk I/O errors.
; ax is destroyed.
;
dosioerr:
	mov	dosret, ax		;record DOS error code for getreg
	mov	al, 3			;flag I/O error
	jmp	error			;(pjmp) handle error

;-------------------------------------------------------------------------------
;Intrinsic to write to a specified sector on a disk.
; WRITE(DRIVE, SECTOR, BUFFER, SIZE);
; WARNING: This uses (overwrites) device 3's large output buffer
;
intr30:	push	ebx			;save registers
	push	ecx			;(these affect offsets to arguments)
	push	esi
	push	edi

	mov	ebx, offset dotbuf	;set up address of BUFFER
	mov	bufadr, bx
	mov	ebx, [esp]+(2+4)*4	;set starting (logical) SECTOR
	mov	logsec, ebx

	mov	ecx, eax		;set loop counter with number of sectors
	mov	esi, [esp]+(1+4)*4	;esi:= start of BUFFER
fwrit0:	push	ecx
	mov	ecx, 512/4		;copy BUFFER to dotbuf (where DOS has
	mov	edi, offset dotbuf	; access)
	cld
	rep movsd			;es:[edi++] := ds:[esi++]; ecx --

	mov	edi, offset regs_edi	;point to regs
	mov	al, [esp]+(3+5)*4	;DRIVE
	mov	[regs_al-regs_edi+edi], al
	mov	[regs_bx-regs_edi+edi], offset logsec
	mov	[regs_cx-regs_edi+edi], -1
	mov	[regs_ds-regs_edi+edi], dseg
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 26h			;set up call to do absolute disk write
	mov	ax, 300h		;do function 0300h
	int	31h			;do int 31h to do the call to real mode
	jnc	fwrit1			;skip no error
	 call	dosioerr		;handle error
fwrit1:
	inc	logsec			;next sector
	pop	ecx
	loop	fwrit0

	pop	edi
	pop	esi
	pop	ecx
	pop	ebx
	ret	3*4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to read from a specified sector on a disk.
; READ(DRIVE, SECTOR, BUFFER, SIZE);
; WARNING: This uses (overwrites) device 3's large input buffer
;
intr31:	push	ebx			;save registers
	push	ecx			;(these affect offsets to arguments)
	push	esi
	push	edi

	mov	ebx, offset dinbuf	;set up address of BUFFER
	mov	bufadr, bx
	mov	ebx, [esp]+(2+4)*4	;set starting (logical) SECTOR
	mov	logsec, ebx

	mov	ecx, eax		;set loop counter with number of sectors
	mov	edi, [esp]+(1+4)*4	;edi:= start of BUFFER
fread0:	push	ecx
	push	edi
	mov	edi, offset regs_edi	;point to regs
	mov	al, [esp]+(3+6)*4	;DRIVE
	mov	[regs_al-regs_edi+edi], al
	mov	[regs_bx-regs_edi+edi], offset logsec
	mov	[regs_cx-regs_edi+edi], -1
	mov	[regs_ds-regs_edi+edi], dseg
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 25h			;set up call to do absolute disk read
	mov	ax, 300h		;do function 0300h
	int	31h			;do int 31h to do the call to real mode
	jnc	fread1			;skip no error
	 call	dosioerr		;handle error
fread1:
	mov	ecx, 512/4		;copy dinbuf to BUFFER (DOS cannot
	mov	esi, offset dinbuf	; BUFFER, which is in upper memory)
	pop	edi
	cld
	rep movsd			;es:[edi++] := ds:[esi++]; ecx --

	inc	logsec			;next sector
	pop	ecx
	loop	fread0

	pop	edi
	pop	esi
	pop	ecx
	pop	ebx
	ret	3*4			;drop args

dseg	segment	para public use32 'data'
;Information packet used by interrupt 24h and 25h
logsec	dd	?		;32-bit logical sector number
	dw	1		;number of sectors to read or write
bufadr	dw	?		;address of BUFFER (segment:offset)
	dw	dseg
dseg	ends

;-------------------------------------------------------------------------------
;Intrinsic to close a disk file by its handle.
; FCLOSE(HANDLE);
;
intr32:	push	ebx			;save registers

	mov	ebx, eax		;get handle
	mov	ah, 3Eh			;close file
	call	doscal			;call DOS routine
	jnc	short clfdk1		;skip no error
	call	dosioerr		;handle I/O error
clfdk1:
	pop	ebx			;restore registers
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return 'true' if a key has been struck.
; FLAG:= CHKKEY;
;
intr33:	mov	ah, 01h			;keyboard status function
	int	16h			;BIOS keyboard system call
	je	short chkky1		;skip if no key is waiting
	or	eax, -1			;return 'true'
	ret
chkky1:
	xor	eax, eax		;return 'false'
	ret

;-------------------------------------------------------------------------------
;Intrinsic to call a DOS or BIOS "interrupt" routine. Arguments can be
; passed in the registers (both ways) using GETREG, intrinsic 35.
; SOFTINT($21);
;
intr34:
	cmp	al, 31h
	je	sint0

	call	lodreg			;copy: regs_ax <- axreg, etc.

	push	ebx
	push	ecx
	push	edi			;save registers
	mov	edi, offset regs_edi	;point to regs (es base = dseg)
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	ebx, eax		;set the interrupt number passed in eax
	mov	ax, 300h		;do function 0300h
	int	31h			;do int 31h to do the call to real mode
	pop	edi
	pop	ecx
	pop	ebx			;restore registers
	push	regs_flags		;get flags returned by real-mode int
	popf
	sti

	call	unlreg			;copy: axreg <- regs_ax, etc.
					; without changing the carry flag
	jnc	short sint1		;test carry flag returned by int
	or	cflag, -1		;set cflag accordingly
	jmp	short sint2
sint1:
	and	cflag, 0
sint2:	ret


;if interrupt 31h then 
sint0:
	pusha
	mov	eax, dword ptr axreg		;set the registers
	mov	ebx, dword ptr bxreg
	mov	ecx, dword ptr cxreg
	mov	edx, dword ptr dxreg
	mov	esi, dword ptr sireg
	mov	edi, dword ptr direg
	mov	ebp, dword ptr bpreg
					;sp
					;cs (here)
					;ss
					;flags
;	mov	es, esreg
;	mov	ds, dsreg

	int	31h

	mov	dword ptr axreg, eax
	mov	dword ptr bxreg, ebx
	mov	dword ptr cxreg, ecx
	mov	dword ptr dxreg, edx
	mov	dword ptr sireg, esi
	mov	dword ptr direg, edi
	mov	dword ptr bpreg, ebp
					;sp
					;cs (here)
					;ds
					;ss
;	mov	esreg, es
;	mov	dsreg, ds

	pushfd				;save flag register (mostly carry)
	pop	eax
	mov	flagreg, ax		;we only care about the low 16 flags

	jnc	short sint3		;test carry flag returned by int
	or	cflag, -1		;set cflag accordingly
	jmp	short sint4
sint3:
	and	cflag, 0
sint4:
	popa
	ret

;-------------------------------------------------------------------------------
;Intrinsic to return the address of MS-DOS environment information, which
; consists of registers that are set and saved by the software interrupt
; intrinsic (34). Also included are the current code segment and DOS errors.
; CPUREG:= GETREG;
;
intr35:	mov	dsgmnt, dseg		;point to data segment
	mov	eax, offset cpureg	;return the address
	ret

;-------------------------------------------------------------------------------
;Intrinsic to read a byte from segmented memory.
; BYTE:= PEEK(SEGMENT, OFFSET);
;
intr37:	mov	edx, [esp]+4		;get segment
	shl	edx, 4			;convert to a linear address
	add	edx, eax		; add offset
	xor	eax, eax		;zero high bytes
	mov	al, fs:[edx]		;read the location
	ret	4

;-------------------------------------------------------------------------------
;Intrinsic to write a byte to segmented memory.
; POKE(SEGMENT, OFFSET, BYTE);
;
intr38:	mov	edx, [esp]+8		;get segment
	shl	edx, 4			;convert to linear address
	add	edx, [esp]+4		; add offset
	mov	fs:[edx], al		;write byte to the location
	ret	2*4			;drop args

;-------------------------------------------------------------------------------
;Intrinsic to do a return from interrupt. This allows an XPL program to
; be used as an interrupt handler.
; INTRET;
;
intr66:
	mov	al, 5			;flag bad intrinsic
	jmp	error
;	mov	esp, entstk
;	call	lodreg			;restore registers
;	iret

;-------------------------------------------------------------------------------
;Intrinsic to do a far jump to an external routine.
; EXTJMP(SEGMENT, OFFSET);
; This uses self-modifying code.
;
intr67:
	mov	al, 5			;flag bad intrinsic
	jmp	error
;	mov	cs:addvc1, ax		;set offset of routine
;	add	sp, 4			;skip return address
;	pop	cs:segvc1		;set segment of routine
;	mov	esp, entstk
;	call	lodreg			;load all registers
;					;(also purges instruction pipeline)
;	db	0EAh			;far jump opcode
;addvc1	dw	0			;offset part of address
;segvc1	dw	0			;segment part of address

;-------------------------------------------------------------------------------
;Intrinsic to do a far call to an external routine.
; EXTCAL(SEGMENT, OFFSET);
; This uses self-modifying code.
;
intr68:
	mov	al, 5			;flag bad intrinsic
	jmp	error
;	mov	cs:addvc2, ax		;set offset of routine
;	mov	ax, [esp]+4		;get segment
;	mov	cs:segvc2, ax		;set segment of routine
;
;	push	ss			;save some registers
;	push	es
;	push	ds
;	push	di
;	call	lodreg			;load all registers
;					;(also purges instruction pipeline)
;	db	9ah			;far call opcode
;addvc2	dw	0			;offset part of address
;segvc2	dw	0			;segment part of address
;
;	call	unlreg			;unload the registers
;	pop	di			;restore registers
;	pop	ds
;	pop	es
;	pop	ss
;	ret	4			;drop args

;-------------------------------------------------------------------------------
;Load all 16-bit general purpose registers plus data segment and extra
; segment.
;
lodreg:	push	eax

	mov	ax, axreg		;set the registers
	mov	regs_ax, ax
	mov	ax, bxreg
	mov	regs_bx, ax
	mov	ax, cxreg
	mov	regs_cx, ax
	mov	ax, dxreg
	mov	regs_dx, ax
	mov	ax, sireg
	mov	regs_si, ax
	mov	ax, direg
	mov	regs_di, ax
	mov	ax, bpreg
	mov	regs_bp, ax
					;sp
					;cs (here)
					;ss
					;flags
	mov	ax, esreg
	mov	regs_es, ax
	mov	ax, dsreg
	mov	regs_ds, ax

	pop	eax
	ret

;-------------------------------------------------------------------------------
;Unload registers into register array without altering carry flag.
;
unlreg:	push	eax

	mov	ax, regs_ax
	mov	axreg, ax
	mov	ax, regs_bx
	mov	bxreg, ax
	mov	ax, regs_cx
	mov	cxreg, ax
	mov	ax, regs_dx
	mov	dxreg, ax
	mov	ax, regs_si
	mov	sireg, ax
	mov	ax, regs_di
	mov	direg, ax
	mov	ax, regs_bp
	mov	bpreg, ax
					;sp
					;cs (here)
					;ds
					;ss
	mov	ax, regs_es
	mov	esreg, ax
	mov	ax, regs_ds
	mov	dsreg, ax

	pushfd				;save flag register (mostly carry)
	pop	eax
	mov	flagreg, ax		;we only care about the low 16 flags

	pop	eax
	ret

;===============================================================================

dseg	segment	para public use32 'data'
	align	4
;Table of routines to handle basic I/O functions
;dev number:	  0	  1	  2	  3	  4	  5	  6	  7	  8
openit	dd	resbuf,	reskey,	dummio,	dopeni,	dummio,	dummio,	dummio,	dummio,	psopni
openot	dd	dummio,	dummio,	dummio,	dopeno,	dummio,	dummio,	reswnd,	dummio,	psopno
chint	dd	bufkey,	keyin,	dummio,	dskin,	serin,	dummio,	wndchn,	dummio,	psin
choutt	dd	conout,	tvout,	prnout,	dskout,	serout,	bioprn,	wndout,	dummio,	psout
closet	dd	dummio,	dummio,	dummio,	dcloso,	dummio,	dummio,	dummio,	dummio,	dummio

lastCh	db	0			;last character read in by chin routine
backFl	db	0			;flag: backup to re-read last character
dseg	ends

;-------------------------------------------------------------------------------
;Initialize input device number (nowdev).
;
openi:	push	eax
	push	ebx			;save registers
	mov	bx, nowdev		;get device number
	mov	ah, bh			;get extended device number in ah
	cmp	bl, 8			;legal device number?
	ja	short ioerr		;(pjmp) jump if not
	movzx	ebx, bl			;set up 32-bit index
	shl	ebx, 2			;times 4 to index dwords
	call	openit[ebx]		;call routine corresponding to device
	jc	short ioerr		;(pjmp) jump if error
	pop	ebx
	pop	eax			;restore registers
	ret

;-------------------------------------------------------------------------------
;Initialize output device number (nowdev).
;
openo:	push	eax
	push	ebx			;save registers
	mov	bx, nowdev		;get device number
	mov	ah, bh			;get extended device number in ah
	cmp	bl, 8			;legal device number?
	ja	short ioerr		;(pjmp) jump if not
	movzx	ebx, bl			;set up 32-bit index
	shl	ebx, 2			;times 4 to index dwords
	call	openot[ebx]		;call routine corresponding to device
	jc	short ioerr		;(pjmp) jump if error
	pop	ebx
	pop	eax			;restore registers
	ret

;-------------------------------------------------------------------------------
;Input byte from device number (nowdev), and return it in al.
; ah is destroyed.
;
chin:	cmp	backFl, 0		;re-read the previous character?
	jne	short chin100		;jump if so

	push	eax			;save registers
	push	ebx			; (eax is saved in case of ioerr)
	mov	bx, nowdev		;get device number
	mov	ah, bh			;get extended device number in ah
	cmp	bl, 8			;legal device number?
	ja	short ioerr		;(pjmp) jump if not
	movzx	ebx, bl			;set up 32-bit index
	shl	ebx, 2			;times 4 to index dwords
	call	chint[ebx]		;call routine corresponding to device
	mov	lastCh, al		;save character for possible backup
	jc	short ioerrx		;jump if error
chin90:	pop	ebx			;restore bx register
	add	esp, 4			;discard original eax value
	ret
chin100:
	mov	backFl, 0		;clear backup flag
	movzx	eax, lastCh		;return the previous character
	ret

;-------------------------------------------------------------------------------
;Report I/O error
; BEWARE OF STACK. This is only called from openi, openo, chin, chout, 
; and close. eax and ebx are already pushed on the stack.
;
ioerr:	mov	al, 3			;report RUN-TIME ERROR 3: I/O
	call	error
	pop	ebx
	pop	eax			;restore registers
	ret

ioerrx:	push	eax			;save eax (might be eof)
	mov	al, 3			;report RUN-TIME ERROR 3: I/O
	call	error
	pop	eax			;restore eax
	jmp	short chin90		;exit

;-------------------------------------------------------------------------------
;Output byte in al to device number (nowdev).
;
chout:	push	eax
	push	ebx			;save registers
	mov	bx, nowdev		;get device number
	mov	ah, bh			;get extended device number in ah
	cmp	bl, 8			;legal device number?
	ja	short ioerr		;(pjmp) jump if not
	movzx	ebx, bl			;set up 32-bit index
	shl	ebx, 2			;times 4 to index dwords
	call	choutt[ebx]		;call routine corresponding to device
	jc	short ioerr		;(pjmp) jump if error
	pop	ebx
	pop	eax			;restore registers
	ret

;-------------------------------------------------------------------------------
;Close output device number (nowdev).
;
close:	push	eax
	push	ebx			;save registers
	mov	bx, nowdev		;get device number
	mov	ah, bh			;get extended device number in ah
	cmp	bl, 8			;legal device number?
	ja	short ioerr		;(pjmp) jump if not
	movzx	ebx, bl			;set up 32-bit index
	shl	ebx, 2			;times 4 to index dwords
	call	closet[ebx]		;call routine corresponding to device
	jc	short ioerr		;(pjmp) jump if error
	pop	ebx
	pop	eax			;restore registers
	ret

;===============================================================================
;		     LOW-LEVEL CONSOLE I/O ROUTINES
;===============================================================================

dseg	segment	para public use32 'data'
bufsiz	equ	128			;size of keyboard buffer in bytes
;DOS expects these three variables to be in this order:
inpsiz	db	bufsiz			;0 number of characters
outsiz	db	0			;1 number of chars read by DOS (-cr)
keybuf	db	bufsiz dup(0)		;2+ buffer read from keyboard incl cr

keyinx	dw	bufsiz			;index into keyboard buffer
dseg	ends

;-------------------------------------------------------------------------------
;Flush (reset) the keyboard buffer (device 0).
;
resbuf:	push	eax			;save register
	mov	keyinx, bufsiz		;flush the keyboard buffer
	mov	ax, 0C00h		;zero DOS input buffer
	int	21h
	pop	eax			;restore register
dummio:	clc				;indicate no errors
	ret

;-------------------------------------------------------------------------------
;Get character from buffered keyboard and return it in al (device 0).
; ah is destroyed.
;
bufkey:	push	ebx
	push	edx			;save registers

	mov	bx, keyinx		;get pointer
	cmp	bx, bufsiz		;test empty
	jne	short getbuf		;skip not empty

;get a new buffer (line) from DOS
	mov	ah, 0Ah			;buffered key input
	lea	dx, inpsiz		;point to start of arguments

	push	ebx
	push	ecx
	push	edi
	mov	edi, offset regs_edi
	mov	[regs_ah-regs_edi+edi], 0Ah ;function 0Ah, buffered keyboard
	mov	[regs_ds-regs_edi+edi], dseg
	mov	[regs_dx-regs_edi+edi], dx
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode
	pop	edi
	pop	ecx
	pop	ebx

	mov	al, lf			;echo a line feed after the carriage ret
	call	tvout
	xor	ebx, ebx		;set index to zero
getbuf:
	mov	al, keybuf[ebx]		;get a byte from buffer
	inc	ebx			;point to next character
	cmp	al, cr			;carriage return?
	jne	short bufky1		;jump if not
	mov	bx, bufsiz		;then buffer is empty
bufky1:
	mov	keyinx, bx		;save index into buffer

	pop	edx
	pop	ebx			;restore registers
	clc				;no errors
	ret

;-------------------------------------------------------------------------------
;Output character in al to the console (device 0).
;
conout:	push	eax
	push	edx			;save registers
	cmp	al, 0Ch			;form feed?
	jne	short cono10		;jump if not
	call	clear			;else clear screen
	mov	al, cr			;follow with a carriage return
cono10:
	mov	dl, al			;get character
	mov	ah, 02h			;set function for character output
	int	21h			;DOS call

	pop	edx
	pop	eax			;restore registers
	clc				;indicate no errors
	ret

;-------------------------------------------------------------------------------
;Clear the screen using the BIOS scrolling routine. This works for both
; graphics and text.
;
;Addresses of BIOS variables in low memory:
vmode	equ	0449h			;current video mode
swidth	equ	044ah			;screen width in characters
scrows	equ	0484h			;number of character rows -1 on screen
vpage	equ	0462h			;current video page

clear:	push	eax
	push	ebx
	push	ecx
	push	edx			;save registers

;set scrolling window
	xor	ecx, ecx		;set upper-left corner of window
	mov	dh, fs:scrows		;get last row
	cmp	dh, 36			;clears bottom 8 lines for mode 6ah
	jne	short clear5		; when char height = 16. 16*(36+1) = 592
	inc	dh			; (16*38*800/8 = 60800 which is < F000h
	mov	fs:scrows, dh		;Compaq Presario 1240 requires this
clear5:
	cmp	dh, 41			;clears bottom 12 lines for mode 6ah
	jne	short clear4		; when char height = 14. 14*(41+1) = 588
	inc	dh			; (14*43*800/8 = 60200 which is < F000h
	mov	fs:scrows, dh		;Compaq Presario 1240 requires this
clear4:
	cmp	dh, 127			;out of range? (1024pix / 8 pix/char)
	jbe	short clear3		;skip if not
	mov	dh, 127			;set largest screen (1024/8 = 128)
clear3:
	test	dh, dh			;zero?
	jne	short clear2		;skip if advanced BIOS
	mov	dh, 24			;must be CGA or MDA, use 25-1
clear2:
	mov	dl, fs:swidth		;get width
	dec	dl			;minus one

;select proper attribute for graphics versus text mode
	mov	bh, 07h			;set normal attribute
	mov	bl, fs:vmode		;get video mode
	cmp	bl, 3			;mode 3 or less?
	jle	short clear1		;jump if so
	cmp	bl, 07h			;mode seven?
	je 	short clear1		;jump if so
	xor	bh, bh			;use zero attribute for graphics modes
clear1:
	mov	ax, 0600h		;call scroll function to blank window
	call	nVidia			;compensate for nVidia card bug
	int	10h			;call BIOS

	dec	dh			;restore scrows if it was changed above
	cmp	dh, 36			;this is for the Compaq Presario
	jne	short clear0		; model 1240 - 800x600 NeoMagic display
	mov	fs:scrows, dh
clear0:	cmp	dh, 41
	jne	short clear9
	mov	fs:scrows, dh
clear9:
	mov	bh, fs:vpage		;get current page
	xor	edx, edx		;set cursor to home (row=0, col=0)
	mov	ah, 02h			;set cursor function
	int	10h			;call BIOS

	pop	edx
	pop	ecx
	pop	ebx
	pop	eax			;restore registers
	clc				;indicate no errors
	ret

;-------------------------------------------------------------------------------
;Clear keystrokes for device 1 input
;
reskey:	mov	ah, 01			;get keyboard status
	int	16h
	jz	short resk90		;jump if no key is waiting
	mov	ah, 0			;read character from keyboard
	int	16h
	jmp	short reskey		;loop until no key is waiting
resk90:
	clc
	ret

;-------------------------------------------------------------------------------
;Input a char from the keyboard and return it in al. Does not echo character
; (device 1). ah is destroyed.
;
keyin:	mov	ah, 08h			;character input without echo
	int	21h			;call DOS
	clc				;indicate no errors
	ret

;-------------------------------------------------------------------------------
;Output character in al to the console, but with no ctrl-c, no tab handling
; etc. (device 1).
;
tvout:	push	eax
	push	ebx			;save registers

	mov	bx, 0007h		;set foreground color for graphics modes
					; and display page 0
	mov	ah, 0Eh			;write character in teletype mode
	int	10h			;call BIOS

	pop	ebx
	pop	eax			;restore registers
	clc				;indicate no errors
	ret

;===============================================================================
;Print character in al (device 2).
;
prnout:	push	eax
	push	edx			;save registers

	mov	dl, al			;get output char
	mov	ah, 05h			;function = printer output
	int	21h

	pop	edx
	pop	eax			;restore registers
	clc				;indicate no errors
	ret

;-------------------------------------------------------------------------------
;Print character in al (device 5).
; Output to the printer using BIOS instead of DOS (it's five times faster)
;
bioprn:	push	eax
	push	edx			;save registers

	mov	dl, ah			;get extended device number
	mov	dh, 0
	mov	ah, 0			;select BIOS printer output function
	int	17h

	pop	edx
	pop	eax			;restore registers
	clc				;indicate no errors
	ret

;===============================================================================
;Routines to handle character-stream disk I/O (device 3).
;
dseg	segment	para public use32 'data'
;File parameters
	align	4
inhand	dw	0			;input file handle
inbflg	db	'i'			;input buffer flag ('I' = big buffer)
dineof	db	0			;eof seen on buffered input
dinbyt	db	0			;one-byte (small) buffer
	extrn	dinsiz:abs
	align	4
dinend	dw	dinsiz			;point to end +1
dinptr	dw	dinsiz			;working pointer

	align	4
othand	dw	0			;output file handle
otbflg	db	'i'			;output buffer flag
dotbyt	db	0			;one-byte (small) buffer

	extrn	dotsiz:abs
	align	4
dotend	dw	dotsiz			;point to end +1
dotptr	dw	0			;working pointer
dseg	ends

;-------------------------------------------------------------------------------
;Open the input file set up by FSET (intrinsic 24).
;
dopeni:	push	eax
	push	ebx
	push	ecx
	push	edx			;save registers

	cmp	inbflg, 'I'		;(big) buffered input?
	jne	short dopni1		;skip if not
	call	resdib			;zero input buffer
dopni1:
	mov	bx, inhand		;get handle
	mov	ax, 4200h		;set file pointer, use absolute method
	xor	ecx, ecx		;point to start of file
	xor	edx, edx		; (32-bit offset)
	int	21h

	pop	edx
	pop	ecx
	pop	ebx
	pop	eax			;restore registers
	clc				;ignore any errors
	ret

;-------------------------------------------------------------------------------
;Open the output file set up by FSET, intrinsic 24.
;
dopeno:	push	eax
	push	ebx
	push	ecx
	push	edx			;save registers

	mov	bx, othand		;get handle
	mov	ax, 4200h		;move file pointer, use absolute method
	xor	ecx, ecx		;point to start of file (rewind)
	xor	edx, edx		; (32-bit offset)
	int	21h

	cmp	otbflg, 'O'		;(big) buffered output?
	jne	short dopen2		;skip if not
	call	resdob			;zero output buffer
dopen2:
	pop	edx
	pop	ecx
	pop	ebx
	pop	eax			;restore registers
	clc				;ignore any errors
	ret

;-------------------------------------------------------------------------------
;Read a byte from the disk input file, and return it in al.
; Buffered case is optimized for speed.
;
	align	4			;(for a little speed)
dskin:	cmp	inbflg, 'I'		;is the large buffer being used?
	jne	dskin0			;jump if not
	push	ebx			;save register
	xor	ebx, ebx
	mov	bx, dinptr		;get buffer pointer
	cmp	dinend, bx		;is the buffer empty?
	jbe	short dinnew		;jump if so, else clear carry
	mov	al, dinbuf[ebx]		;get a byte from the buffer
	inc	ebx			;bump pointer
	mov	dinptr, bx
	pop	ebx			;restore register
	ret				;return with no errors (carry clear)

;Get a new buffer. ah is destroyed.
dinnew:	push	ecx
	push	edx			;save rest of registers
	mov	ah, 3Fh			;read DOS file
	mov	bx, inhand		;point to handle
	mov	cx, dinsiz		;set read size
	lea	edx, dinbuf		;point to start of buffer
	call	dosCalD			;DOS call
	jnc	short dinnw1		;jump if no error
	mov	dosret, ax		;record DOS error code for getreg
	stc				;indicate error
	jmp	short dinnw9		;exit
dinnw1:
	test	ax, ax			;test number of bytes actually read
	jne	short dinnw4		;skip if not end of file
	mov	al, eof			;return eof
	cmp	dineof, al		;already seen eof?
	jne	short dinnw2		;jump if not
	stc				;else flag error
	jmp	short dinnw9		;exit
dinnw2:
	mov	dineof, al		;save end of file
	jmp	short dinnw8		;return with no errors
dinnw4:
	mov	dinend, ax		;set end to number of bytes read
	xor	ebx, ebx		;reset buffer pointer
	mov	dineof, bl		;flag no eof
	mov	dinptr, bx
	mov	al, dinbuf		;get first byte in buffer
	inc	dinptr			;bump pointer

dinnw8:	clc				;no errors
dinnw9:	pop	edx
	pop	ecx
	pop	ebx			;restore registers
	ret

;Get byte from small (non) buffer. ah is destroyed.
dskin0:	push	ebx
	push	ecx
	push	edx			;save registers
	mov	ah, 3Fh			;read a file
	mov	bx, inhand		;get its handle
	mov	cx, 1			;read one byte
	mov	edx, offset dinbyt	;point to buffer
	call	doscal			;do the read
	jnc	short dskin1		;jump if no error
	mov	dosret, ax		;record DOS error code for getreg
	stc				;indicate error
	jmp	short dskin9		;exit
dskin1:
	test	ax, ax			;test number of bytes read
	jne	short dskin4		;skip if not end of file
	cmp	dinbyt, eof		;eof char seen yet?
	jne	short dskin2		;skip if not
	mov	al, dinbyt		;return something
	stc				;flag error
	jmp	short dskin9		;exit

dskin2:	mov	dinbyt, eof		;return eof char
dskin4:	mov	al, dinbyt		;return the byte in al
	clc				;no errors
dskin9:	pop	edx
	pop	ecx
	pop	ebx			;restore registers
	ret

;-------------------------------------------------------------------------------
;Write the byte in al to the output file.
; Buffered case is optimized for speed.
;
	align	4			;(for a little speed)
dskout:	cmp	otbflg, 'O'		;is the larger buffer being used?
	jne	short dskot0		;jump if not
	push	ebx			;save register
	xor	ebx, ebx
	mov	bx, dotptr		;get buffer pointer
	mov	dotbuf[ebx], al		;store byte into buffer
	inc	ebx			;bump pointer
	cmp	dotend, bx		;is the buffer full?
	jbe	short flush		;jump if so, else clear carry
	mov	dotptr, bx		;save pointer
	pop	ebx			;restore register
	ret				;return with no errors (carry clear)

dskot0:	push	eax
	push	ebx
	push	ecx
	push	edx			;save registers
	mov	ah, 40h			;write to file
	mov	bx, othand		;get handle
	mov	cx, 1			;number of bytes to write
	mov	edx, offset dotbyt	;point to one-byte buffer
	mov	dotbyt, al		;put byte into buffer
	call	doscal			;do the write
	jc	short dskot8		;jump if error
	test	ax, ax			;number of bytes actually written
	jne	short dskot9		;jump if disk is full
					;no errors: carry is cleared by test
dskot8:	mov	dosret, ax		;record DOS error code for getreg
	stc				;indicate error
dskot9:	pop	edx
	pop	ecx
	pop	ebx
	pop	eax			;restore registers
	ret

;Write the big buffer to disk. ebx is already pushed on the stack.
flushb:	push	ebx			;save ebx
flush:	push	eax
	push	ecx
	push	edx			;save the rest of the registers

	mov	ah, 40h			;write to file
	mov	ecx, ebx		;number of bytes to write
	mov	bx, othand		;get handle
	lea	edx, dotbuf		;point to start of buffer
	call	dosCalD			;call DOS
	jc	short flush8		;jump if error
	test	ax, ax			;number of bytes actually written
	je	short flush8		;jump if disk is full
	and	dotptr, 0		;reset pointer
	clc				;flag no error
	jmp	short flush9		;exit

flush8:	mov	dosret, ax		;record DOS error code for getreg
	stc				;indicate error
flush9:	pop	edx
	pop	ecx
	pop	eax
	pop	ebx			;restore registers
	ret

;-------------------------------------------------------------------------------
;Close output file set up by FSET, intrinsic 24.
;
dcloso:	cmp	otbflg, 'O'		;(big) buffered output?
	jne	short dclos2		;skip if not
	push	ebx			;save registers
	mov	bx, dotptr		;get pointer
	call	flushb			;output the buffer
	pop	ebx			;restore register
dclos2:	clc				;no errors
	ret

;-------------------------------------------------------------------------------
;Reset input buffer pointer.
; ax is destroyed.
;
resdib:	mov	ax, dinsiz
	mov	dinptr, ax
	mov	dinend, ax
	ret

;-------------------------------------------------------------------------------
;Reset output buffer pointer.
;
resdob:	and	dotptr, 0		;reset pointer
	clc				;flag no error
	ret

;-------------------------------------------------------------------------------
;Routine to call a DOS function. This saves the interrupt vector and the
; function number in the register table so that if an error occurs, GETREG
; (intrinsic 35) can be used to determine additional information about
; what caused the error.
;
doscal:	mov	dosint, 21h		;record current interrupt
	mov	dosfun, ah		;record function number

	push	eax
	push	ebx
	push	ecx
	push	edi
	mov	regs_di, di
	mov	edi, offset regs_edi ;point to regs
	mov	[regs_ax-regs_edi+edi], ax ;copy registers into regs
	mov	[regs_bx-regs_edi+edi], bx
	mov	[regs_cx-regs_edi+edi], cx
	mov	[regs_dx-regs_edi+edi], dx
	mov	[regs_si-regs_edi+edi], si
	mov	[regs_bp-regs_edi+edi], bp
	mov	[regs_ds-regs_edi+edi], dseg ;reason that int 21h won't work
	mov	[regs_es-regs_edi+edi], dseg ;set it to something
dosCal5:
	xor	ecx, ecx		;no parameters on stack
	mov	[regs_ss-regs_edi+edi], cx ;SS:SP = 0, PMODE will provide stack
	mov	[regs_sp-regs_edi+edi], cx
	mov	bx, 21h			;call interrupt 21h
	mov	ax, 300h		;int 31h function 0300h
	int	31h			;do the call to real mode
	pop	edi
	pop	ecx
	pop	ebx
	pop	eax

;if the carry flag is set at this point then function 300h failed, otherwise...
	push	regs_flags		;set flags for simulated interrupt
	popf
	sti
	mov	ax, regs_ax		;get value returned by int 21h
	ret

;-------------------------------------------------------------------------------
;Alternate dosCal that reaches buffers that might be more than 64K beyond dseg.
; Inputs edx which is the offset to the buffer.
;
dosCalD:mov	dosint, 21h		;record current interrupt
	mov	dosfun, ah		;record function number

	push	eax
	push	ebx
	push	ecx
	push	edi
;	mov	regs_di, di		;not used
	mov	edi, offset regs_edi	;point to regs
	mov	[regs_ax-regs_edi+edi], ax ;copy registers into regs
	mov	[regs_bx-regs_edi+edi], bx
	mov	[regs_cx-regs_edi+edi], cx
;	mov	[regs_si-regs_edi+edi], si ;not used
;	mov	[regs_bp-regs_edi+edi], bp
	mov	eax, edx		;the offset can be greater than 64K if
	push	eax			; the XPL code has large constant arrays
	and	ax, 000Fh		;since the BIOS call can't handle an
	mov	[regs_dx-regs_edi+edi], ax ; offset > 64K we must read just the
	pop	eax			; segment address
	shr	eax, 4
	add	ax, dseg
	mov	[regs_ds-regs_edi+edi], ax ;reason a simple int 21h doesn't work
;	mov	[regs_es-regs_edi+edi], dseg ;not used
	jmp	dosCal5			;jump to common code

;===============================================================================
;Input a byte from the serial port (com1) and return it in al.
; ah is destroyed.
;
serin:	push	edx			;save edx

	mov	dl, ah			;get extended device number
	mov	dh, 0
seri10:	mov	ah, 02h			;serial input function
	int	14h			;BIOS serial port system call
	test	ah, ah			;successful?
	js	short seri10		;loop if not - don't time out

	pop	edx			;restore edx
	clc				;indicate success
	ret

;-------------------------------------------------------------------------------
;Output byte in al to serial port (com1).
;
serout:	push	eax
	push	edx			;save registers

	mov	dl, ah			;get extended device number
	mov	dh, 0
	mov	ah, 01h			;serial output function
	int	14h			;BIOS serial port system call

	pop	edx
	pop	eax			;restore registers
	clc				;indicate success
	ret

;===============================================================================
;Pseudo device handler (device 8).
; The pseudo device is a circular memory buffer which can be read from or
; written to. It is used to simplify string parsing and number conversion.
;
dseg	segment	para public use32 'data'
	align	4
	extrn	psusiz:abs
psired	dw	0			;read pointer
psiwrt	dw	0			;write pointer
psistr	dw	0			;start pointer
dseg	ends

;-------------------------------------------------------------------------------
;Open pseudo device for output.
;
psopno:	mov	psiwrt, 0		;zero write pointer
	mov	psistr, 0		;zero start pointer
					;(pfall) also open input...
;-------------------------------------------------------------------------------
;Open pseudo device for input.
;
psopni:	push	psistr			;set read pointer to start
	pop	psired
	clc				;indicate no errors
	ret

;-------------------------------------------------------------------------------
;Input a byte from pseudo device and return it in al.
;
psin:	push	ebx			;save register

	xor	ebx, ebx
	mov	bx, psired		;get read pointer
	cmp	bx, psiwrt		;read=write if empty
	jne	short psin1		;skip if more to read
	mov	al, eof			;signal end of file
	jmp	short psin2
psin1:
	mov	al, psubuf[ebx]		;read byte
	inc	psired			;advance pointer
	cmp	psired, psusiz		;wrap around?
	jne	short psin2		;skip if not
	mov	psired, 0		;start over
psin2:
	pop	ebx			;restore register
	clc				;no errors
	ret

;-------------------------------------------------------------------------------
;Output the byte in al to the pseudo device.
;
psout:	push	ebx			;save register

	xor	ebx, ebx
	mov	bx, psiwrt		;get write pointer
	mov	psubuf[ebx], al		;store char in buffer

;increment write pointer and handle wrap around and collisions
	inc	psiwrt			;increment write pointer
	cmp	psiwrt, psusiz		;wrap around?
	jb	short incwt1		;skip if not
	mov	psiwrt, 0		;otherwise start over
incwt1:
	mov	bx, psiwrt		;caught start pointer?
	cmp	bx, psistr
	jne	short incwt2		;skip if not
	inc	psistr			;increment start pointer
	cmp	psistr, psusiz		;wrap around?
	jb	short incwt2		;skip if not
	mov	psistr, 0		;start over
incwt2:
	cmp	bx, psired		;caught read pointer?
	jne	short incwt3		;skip if not
	inc	psired			;increment read pointer
	cmp	psired, psusiz		;wrap around?
	jb	short incwt3		;skip if not
	mov	psired, 0		;start over
incwt3:
	pop	ebx			;restore register
	clc
	ret

cseg	ends

;===============================================================================

exe_stack segment para stack use16 'stack'
	  db	stacklen dup(?)
exe_stack ends

	end	start
