--- a/z80unlze.asm
+++ b/z80unlze.asm
@@ -1,4 +1,4 @@
 
				-; Decompression routine for LZEXE-compressed streams, v1.1
			
 
				+; Decompression routine for LZEXE-compressed streams, v1.1p1
			
 
				 ;
			
 
				 ; Copyright © 2020 Pedro Gimeno Fortea
			
 
				 ;
			
@@ -14,20 +14,23 @@
 
				 ; Clobbers: AF, AF', BC
			
 
				 
			
 
				 ; Changes:
			
 
				-;  1.1: Very tiny optimization; also optimizing for size saves 10 more bytes
			
 
				-;  1.0: Original version
			
 
				+;  1.1p1 137/ 92 bytes 11-06-2020 : Moved getbit_routine to bottom, some optimizations by uniabis.
			
 
				+;  1.1   156/112 bytes 10-06-2020 : Very tiny optimization; also optimizing for size saves 10 more bytes
			
 
				+;  1.0   157/122 bytes 09-06-2020 : Original version
			
 
				 
			
 
				 ; Set OPTIMIZE to either SPEED or SIZE.
			
 
				 ; Optimizing for speed makes the getbit code be inlined, at the cost of
			
 
				-; decompressor memory (156 bytes as of this writing).
			
 
				-; Optimizing for size (112 bytes as of this writing) makes the code a bit
			
 
				+; decompressor memory (137 bytes as of this writing).
			
 
				+; Optimizing for size ( 92 bytes as of this writing) makes the code a bit
			
 
				 ; slower due to the calls to getbit and the relative jumps.
			
 
				 
			
 
				 SIZE		equ	0
			
 
				 SPEED		equ	1
			
 
				 
			
 
				 ; Change this to select what to optimize for
			
 
				+		IF !defined OPTIMIZE
			
 
				 OPTIMIZE	equ	SPEED
			
 
				+		ENDIF
			
 
				 
			
 
				 
			
 
				 getbit_code	macro
			
@@ -42,6 +45,7 @@ getbit_code	macro
 
				 		jp	nz,gotbit
			
 
				 		ELSE
			
 
				 		jr	nz,gotbit
			
 
				+initbits:
			
 
				 		ENDIF
			
 
				 		ld	c,(hl)
			
 
				 		inc	hl
			
@@ -52,7 +56,9 @@ gotbit:
 
				 		ex	af,af'
			
 
				 		endm
			
 
				 
			
 
				-		IF OPTIMIZE=SPEED
			
 
				+unlze		proc
			
 
				+
			
 
				+		IF	OPTIMIZE=SPEED
			
 
				 
			
 
				 		; Optimize for speed
			
 
				 
			
@@ -72,8 +78,7 @@ jc		macro	x,y
 
				 
			
 
				 		; Optimize for size
			
 
				 
			
 
				-getbit_routine:	getbit_code
			
 
				-		ret
			
 
				+		local	getbit_routine
			
 
				 
			
 
				 getbit		macro
			
 
				 		call	getbit_routine
			
@@ -89,21 +94,32 @@ jc		macro	x,y
 
				 
			
 
				 		ENDIF
			
 
				 
			
 
				-unlze		proc
			
 
				-
			
 
				+		local	one_literal
			
 
				 		local	mainloop
			
 
				 		local	not_literal
			
 
				+		local	copy_match
			
 
				 		local	long_match
			
 
				 		local	got_length
			
 
				 		local	clean_up_and_loop
			
 
				 		local	clean_up_and_ret
			
 
				+		local	initbits
			
 
				 
			
 
				-		ld	a,16
			
 
				-		ex	af,af'
			
 
				+		IF	OPTIMIZE=SPEED
			
 
				 		ld	c,(hl)
			
 
				 		inc	hl
			
 
				 		ld	b,(hl)
			
 
				 		inc	hl
			
 
				+		ld	a,16
			
 
				+		ex	af,af'
			
 
				+		ELSE
			
 
				+		call	initbits
			
 
				+		ENDIF
			
 
				+
			
 
				+		jr	mainloop
			
 
				+
			
 
				+		; 1 = copy next byte verbatim
			
 
				+one_literal:	inc	bc
			
 
				+		ldi
			
 
				 
			
 
				 		; Bitstream codes:
			
 
				 		; 1 = copy next byte verbatim
			
@@ -124,84 +140,78 @@ unlze		proc
 
				 		;        more, the actual length is the code + 1.
			
 
				 
			
 
				 mainloop:	getbit
			
 
				-		jc	nc,not_literal
			
 
				-		; 1 = copy next byte verbatim
			
 
				-		ld	a,(hl)
			
 
				-		inc	hl
			
 
				-		ld	(de),a
			
 
				-		inc	de
			
 
				-		j	mainloop
			
 
				+		jc	c,one_literal
			
 
				 
			
 
				 not_literal:	getbit
			
 
				 		jc	c,long_match
			
 
				 
			
 
				 		; Short match
			
 
				+		xor	a
			
 
				 		getbit
			
 
				 		rla
			
 
				 		getbit
			
 
				 		rla
			
 
				-		and	3
			
 
				-		add	a,2	; Length
			
 
				 
			
 
				 		push	bc
			
 
				-		ld	c,a
			
 
				-		ld	a,(hl)	; Offset in two's complement (always negative)
			
 
				-		inc	hl
			
 
				 		push	hl
			
 
				-		ld	b,0	; Length in BC
			
 
				-		add	a,e	; HL = DE + offset (offset is always negative)
			
 
				-		ld	l,a
			
 
				-		ld	a,-1
			
 
				-		adc	a,d
			
 
				-		ld	h,a
			
 
				+
			
 
				+		ld	l,(hl)	; Offset in two's complement (always negative)
			
 
				+		ld	h,-1
			
 
				+
			
 
				+
			
 
				+		; Length-2 in A
			
 
				+		; Offset in HL
			
 
				+copy_match:	add	hl,de	; HL = DE + offset (offset is always negative)
			
 
				+		inc	a
			
 
				+		ld	c,a
			
 
				+		ld	b,0
			
 
				 		ldir
			
 
				+		ldi
			
 
				 		pop	hl
			
 
				+
			
 
				+clean_up_and_loop:
			
 
				+		inc	hl
			
 
				 		pop	bc
			
 
				 		j	mainloop
			
 
				 
			
 
				+clean_up_and_ret:
			
 
				+		inc	hl
			
 
				+		pop	bc
			
 
				+		ret
			
 
				+
			
 
				 long_match:	push	bc
			
 
				 		ld	c,(hl)
			
 
				 		inc	hl
			
 
				-		ld	b,(hl)
			
 
				-		ld	a,b
			
 
				+		ld	a,(hl)
			
 
				+		ld	b,a
			
 
				 		; Carry is assumed to be set here! (saves 1 cycle)
			
 
				 		rr	b
			
 
				 		sra	b
			
 
				 		sra	b
			
 
				 
			
 
				-		inc	hl
			
 
				 		and	7
			
 
				 		jc	nz,got_length
			
 
				-		ld	a,(hl)
			
 
				 		inc	hl
			
 
				-
			
 
				+		or	(hl)
			
 
				 		; Check special codes
			
 
				-		cp	1
			
 
				-		jc	z,clean_up_and_loop	; equal to 1, ignore
			
 
				-		jc	c,clean_up_and_ret	; less than 1, must be 0, exit
			
 
				+		jr	z,clean_up_and_ret	; less than 1, must be 0, exit
			
 
				+
			
 
				+		dec	a
			
 
				+		jr	z,clean_up_and_loop	; equal to 1, ignore
			
 
				 		; Actual lenght is A + 1, but we add 2 when falling through
			
 
				 		; so we compensate here:
			
 
				-		dec	a
			
 
				 		; Fall through
			
 
				 
			
 
				-got_length:	; Apply offset
			
 
				+got_length:	; Offset in BC
			
 
				 		push	hl
			
 
				-		ld	h,d
			
 
				-		ld	l,e
			
 
				-		add	hl,bc	; HL = DE + offset (negative)
			
 
				-		ld	c,a
			
 
				-		ld	b,0
			
 
				-		inc	c	; This one never overflows so inc only C
			
 
				-		inc	bc	; This one could overflow so inc as a pair
			
 
				-		ldir
			
 
				-		pop	hl
			
 
				-
			
 
				-clean_up_and_loop:
			
 
				-		pop	bc
			
 
				-		j	mainloop
			
 
				+		ld	l,c
			
 
				+		ld	h,b
			
 
				+		j	copy_match
			
 
				 
			
 
				-clean_up_and_ret:
			
 
				-		pop	bc
			
 
				+		IF	OPTIMIZE=SPEED
			
 
				+		ELSE
			
 
				+getbit_routine:	getbit_code
			
 
				 		ret
			
 
				+		ENDIF
			
 
				 
			
 
				 		endp