#1 Some optimizations

Merged
pgimeno merged 2 commits from uniabis/p1 into pgimeno/master 3 years ago
1 changed files with 65 additions and 55 deletions
  1. 65 55
      z80unlze.asm

+ 65 - 55
z80unlze.asm

@@ -1,4 +1,4 @@
-; Decompression routine for LZEXE-compressed streams, v1.1
+; Decompression routine for LZEXE-compressed streams, v1.1p1
 ;
 ; Copyright © 2020 Pedro Gimeno Fortea
 ;
@@ -14,20 +14,23 @@
 ; Clobbers: AF, AF', BC
 
 ; Changes:
-;  1.1: Very tiny optimization; also optimizing for size saves 10 more bytes
-;  1.0: Original version
+;  1.1p1 137/ 92 bytes 11-06-2020 : Moved getbit_routine to bottom, some optimizations by uniabis.
+;  1.1   156/112 bytes 10-06-2020 : Very tiny optimization; also optimizing for size saves 10 more bytes
+;  1.0   157/122 bytes 09-06-2020 : Original version
 
 ; Set OPTIMIZE to either SPEED or SIZE.
 ; Optimizing for speed makes the getbit code be inlined, at the cost of
-; decompressor memory (156 bytes as of this writing).
-; Optimizing for size (112 bytes as of this writing) makes the code a bit
+; decompressor memory (137 bytes as of this writing).
+; Optimizing for size ( 92 bytes as of this writing) makes the code a bit
 ; slower due to the calls to getbit and the relative jumps.
 
 SIZE		equ	0
 SPEED		equ	1
 
 ; Change this to select what to optimize for
+		IF !defined OPTIMIZE
 OPTIMIZE	equ	SPEED
+		ENDIF
 
 
 getbit_code	macro
@@ -42,6 +45,7 @@ getbit_code	macro
 		jp	nz,gotbit
 		ELSE
 		jr	nz,gotbit
+initbits:
 		ENDIF
 		ld	c,(hl)
 		inc	hl
@@ -52,7 +56,9 @@ gotbit:
 		ex	af,af'
 		endm
 
-		IF OPTIMIZE=SPEED
+unlze		proc
+
+		IF	OPTIMIZE=SPEED
 
 		; Optimize for speed
 
@@ -72,8 +78,7 @@ jc		macro	x,y
 
 		; Optimize for size
 
-getbit_routine:	getbit_code
-		ret
+		local	getbit_routine
 
 getbit		macro
 		call	getbit_routine
@@ -89,21 +94,32 @@ jc		macro	x,y
 
 		ENDIF
 
-unlze		proc
-
+		local	one_literal
 		local	mainloop
 		local	not_literal
+		local	copy_match
 		local	long_match
 		local	got_length
 		local	clean_up_and_loop
 		local	clean_up_and_ret
+		local	initbits
 
-		ld	a,16
-		ex	af,af'
+		IF	OPTIMIZE=SPEED
 		ld	c,(hl)
 		inc	hl
 		ld	b,(hl)
 		inc	hl
+		ld	a,16
+		ex	af,af'
+		ELSE
+		call	initbits
+		ENDIF
+
+		jr	mainloop
+
+		; 1 = copy next byte verbatim
+one_literal:	inc	bc
+		ldi
 
 		; Bitstream codes:
 		; 1 = copy next byte verbatim
@@ -124,84 +140,78 @@ unlze		proc
 		;        more, the actual length is the code + 1.
 
 mainloop:	getbit
-		jc	nc,not_literal
-		; 1 = copy next byte verbatim
-		ld	a,(hl)
-		inc	hl
-		ld	(de),a
-		inc	de
-		j	mainloop
+		jc	c,one_literal
 
 not_literal:	getbit
 		jc	c,long_match
 
 		; Short match
+		xor	a
 		getbit
 		rla
 		getbit
 		rla
-		and	3
-		add	a,2	; Length
 
 		push	bc
-		ld	c,a
-		ld	a,(hl)	; Offset in two's complement (always negative)
-		inc	hl
 		push	hl
-		ld	b,0	; Length in BC
-		add	a,e	; HL = DE + offset (offset is always negative)
-		ld	l,a
-		ld	a,-1
-		adc	a,d
-		ld	h,a
+
+		ld	l,(hl)	; Offset in two's complement (always negative)
+		ld	h,-1
+
+
+		; Length-2 in A
+		; Offset in HL
+copy_match:	add	hl,de	; HL = DE + offset (offset is always negative)
+		inc	a
+		ld	c,a
+		ld	b,0
 		ldir
+		ldi
 		pop	hl
+
+clean_up_and_loop:
+		inc	hl
 		pop	bc
 		j	mainloop
 
+clean_up_and_ret:
+		inc	hl
+		pop	bc
+		ret
+
 long_match:	push	bc
 		ld	c,(hl)
 		inc	hl
-		ld	b,(hl)
-		ld	a,b
+		ld	a,(hl)
+		ld	b,a
 		; Carry is assumed to be set here! (saves 1 cycle)
 		rr	b
 		sra	b
 		sra	b
 
-		inc	hl
 		and	7
 		jc	nz,got_length
-		ld	a,(hl)
 		inc	hl
-
+		or	(hl)
 		; Check special codes
-		cp	1
-		jc	z,clean_up_and_loop	; equal to 1, ignore
-		jc	c,clean_up_and_ret	; less than 1, must be 0, exit
+		jr	z,clean_up_and_ret	; less than 1, must be 0, exit
+
+		dec	a
+		jr	z,clean_up_and_loop	; equal to 1, ignore
 		; Actual lenght is A + 1, but we add 2 when falling through
 		; so we compensate here:
-		dec	a
 		; Fall through
 
-got_length:	; Apply offset
+got_length:	; Offset in BC
 		push	hl
-		ld	h,d
-		ld	l,e
-		add	hl,bc	; HL = DE + offset (negative)
-		ld	c,a
-		ld	b,0
-		inc	c	; This one never overflows so inc only C
-		inc	bc	; This one could overflow so inc as a pair
-		ldir
-		pop	hl
-
-clean_up_and_loop:
-		pop	bc
-		j	mainloop
+		ld	l,c
+		ld	h,b
+		j	copy_match
 
-clean_up_and_ret:
-		pop	bc
+		IF	OPTIMIZE=SPEED
+		ELSE
+getbit_routine:	getbit_code
 		ret
+		ENDIF
 
 		endp