; "AcidPlot" - Plotter (ANTIC $0F | Narrow 256x192 | 1-bit)
; Done by Krystone in 2025/2026 in MAD-Assembler

.def INCLUDE_MUSIC			; uncomment to include music player
.def INCLUDE_PMG_GRAPHICS	; uncomment to include PMG graphics
.def PETAL_COUNT_LUT		; uncomment to take petal count from LUT
;.def INCLUDE_LFSR			; uncomment to include LFSR RNG

	icl 'icl/Atari.asm'

dl_mem	  = $2000			; display list in RAM (almost 256 bytes)
pmg_mem   = $3000			; 2K aligned PMG base
sin_table = $3800			; 256-byte sine table in RAM
gfx_mem	  = $4000			; graphics memory ($4000-$57FF)
rowtab_lo = $5A00			; 192-byte row address table (lo)
rowtab_hi = rowtab_lo+192	; 192-byte row address table (hi)

screen_text_top = $5B80		; top text buffer (might not be zeroed)
screen_text_bottom = screen_text_top + 32	; bottom text buffer (might not be zeroed)

pcolr0s   = $02C0			; OS shadow registers for player colors
pcolr1s   = $02C1			; copied to GTIA each VBI
pcolr2s   = $02C2
pcolr3s   = $02C3

	org $80					; Zero page variables

adr			.ds 2
tmpX		.ds 1
oldY		.ds 1
pixelColor	.ds 1			; 0 = clear pixel, 1 = set pixel
maskXor		.ds 1			; $00 for set, $FF for clear (xor with bit mask)
seedLo		.ds 1			; RNG seed (low)
seedHi		.ds 1			; RNG seed (high)
counter		.ds 1			; counter (general purpose)
			
theta       .ds 1			; main angle
theta_k     .ds 1			; k * theta accumulator
theta_k_i   .ds 1			; inner phase
k_step      .ds 1			; petal count control
inner_cnt   .ds 1
petal_count .ds 1			; number of petals (currently used)

.ifdef INCLUDE_MUSIC
msx_chn_copy		.ds 9
msx_chn_pos			.ds 9
msx_bptr			.ds 2
msx_cur_pos			.ds 1
msx_chn_bits		.ds 1
msx_bit_data		.ds 1
msx_song_end_ptr	.ds 2	; end address of current song
msx_song_start_ptr	.ds 2	; start address of current song
msx_song_mask_ptr	.ds 2	; pointer to current song's channel mask byte
.endif

.local scr					; ZP on purpose
masks8
	dta %10000000,%01000000,%00100000,%00010000,%00001000,%00000100,%00000010,%00000001
.endl

	org $2100 ; DL is at $2000

krystone
	dta c'k2o26'			; Signature or... free bytes
;	dta $00, c'kRyStoNe'	; Signature or... free bytes

.local sin64 ; https://mads.atari8.info/mads.html#_sin
	dta b(sin(0,96,256,0,63)) 
.endl

.ifdef PETAL_COUNT_LUT
.local nice_petal_counts_25
	dta $0C, $11, $14, $1F, $29, $2A, $2F, $3F, $40, $4A, $53, $57
	dta $5B, $68, $71, $75, $76, $7F, $81, $92, $96, $98, $9F, $A1, $BF
.endl
.endif

.ifdef INCLUDE_MUSIC
msx_song_data
  ;ins './msx/Something4xVBI-SS004.lz16' 	; 4 x VBI - 196 bytes
  ;ins './msx/Something4xVBI-SS005.lz16' 	; 4 x VBI - 153 bytes
  ;ins './msx/Something1xVBI-SS009.lz16' 	; 1 x VBI - 149 bytes
  ;ins './msx/Something2xVBI-SS006.lz16' 	; 2 x VBI - 140 bytes
  ins './msx/Something1xVBI-SS008.lz16' 	; 1 x VBI -  80 bytes - seems better than SS007
  ;ins './msx/Something1xVBI-SS007.lz16' 	; 1 x VBI -  78 bytes
  ;ins './msx/Something1xVBI-SS010.lz16' 	; 1 x VBI -  76 bytes
  ;ins './msx/Something1xVBI-SS011.lz16' 	; 1 x VBI -  76 bytes - C#2, C#1, C#2, ...
msx_song_end

	icl './icl/PlayLZSS.asm'

	.align 256
msx_buffers						;  Music buffers (256 x 9 = 2304 bytes)
	.ds 256*9

.proc vbi_music ; Deferred VBI: runs once per frame
	pha
	txa
	pha
	tya
	pha
	jsr msx_play_frame
	jsr msx_restart_if_end		; + 1 x VBI
	;jsr msx_play_frame
	;jsr msx_restart_if_end		; + 1 x VBI
	;jsr msx_play_frame
	;jsr msx_restart_if_end		; + 1 x VBI
	;jsr msx_play_frame 
	;jsr msx_restart_if_end		; + 1 x VBI
	pla
	tay
	pla
	tax
	pla
	jmp xitvbv
.endp
.endif

.proc main 						; program entry point
.ifdef INCLUDE_MUSIC
	sta 559						; Screen off
	;sta counter
	; Install music player into deferred VBI
	lda $D40E					; NMIEN
	tax 
	stx $D40E					; disable NMIs to avoid half-written vector
	mwa #vbi_music vvblkd		; write vector 
	txa
	sta $D40E					; restore NMIEN
	mwa #msx_song_end msx_song_end_ptr			; set end pointer for first song
	mwa #msx_song_data+1 msx_song_start_ptr		; set start pointer for first song
	mwa #msx_song_data msx_song_mask_ptr		; set mask pointer to first song
	jsr msx_init
.else
	lda #0
	sta 559						; Screen off
.endif
	ldx #$22
	stx colbaks
	stx colpf0s
	dex 
	stx colpf2s
	ldx #$1F
	stx colpf1s 				; pixel color, and char color in color text modes
.ifdef INCLUDE_PMG_GRAPHICS
	jsr init_pmg				; initialize PMG graphics
.endif
	jsr build_row_table			; precompute row addresses (Y*32) into RAM
	jsr build_sin_table			; build full sine table from quarter sine
	jsr build_display_list		; build display list in RAM

	mwa #dl_mem 560				; SDLSTL ($0230)
	
	lda #$7c					; place pipe characters on screen (decoration)
	sta screen_text_top+0
	sta screen_text_top+31
	sta screen_text_bottom+32+0
	sta screen_text_bottom+32+31

	mva #scr32 559 				; Narrow playfield + DMA + Players + Missiles + Line 1x

; -----------------------------------------------------------------------------

	; TODO: Fix display list, text buffers might overlap - need to check
	; TODO: Reorganize memory to try to fit it into 16K Atari

samples_per_angle = 35 ; 0 is nice - full dots

lo0p:
	jsr draw_direction
.ifdef INCLUDE_LFSR
	lda random				; Seed RNG from POKEY once per frame
	sta seedLo
	lda random
	sta seedHi
	jsr next_random_number
.else
	lda random
.endif
.ifdef PETAL_COUNT_LUT
	and #$1F				; 0-31
	cmp #25
	bcc pc_idx_ok
	sec
	sbc #24					; wrap 25-31 back to 0-6
pc_idx_ok:
	tax
	lda nice_petal_counts_25,x
	sta petal_count
.else
	lda random
	and #$6F 				; limit to 0..105
	clc
	adc #9					; avoid too low petal counts 
	sta petal_count
.endif

	lda #1
	jsr set_pixel_color
	lda petal_count
	jsr draw_petal_rose 	; expects parameter in A

	lda #0
	sta $4D					; prevent Attract mode
	jsr set_pixel_color
	lda petal_count
	jsr draw_petal_rose		; expects parameter in A

	jmp lo0p
.endp

; Flip between INC ($E6) and DEC ($C6) for theta_k_i and theta
; Flip between ADC ($65) and SBC ($E5) for theta_k adjustment

.proc draw_direction
	lda draw_petal_rose.op_theta_k_i
	cmp #$E6
	beq reverse
forward:
	lda #$E6				; INC opcode
	ldx #$65				; ADC opcode
	ldy #$00				; Initial value for forward
	bne apply   			; always taken
reverse:
	lda #$C6				; DEC opcode
	ldx #$E5				; SBC opcode
	ldy #$FF				; Initial value for reverse
apply:
	sta draw_petal_rose.op_theta_k_i
	sta draw_petal_rose.op_theta
	stx draw_petal_rose.op_theta_k
	sty draw_petal_rose.init_value
	rts
.endp

.proc draw_petal_rose
	;lda #3          		; petals count, now taken from A
	sta k_step
init_value:
	lda #0					; SMC: patched to #0 (forward) or #$FF (reverse)
	sta theta
	sta theta_k
outer_loop:
	; prepare inner phase
	lda theta_k
	sta theta_k_i
	lda #main.samples_per_angle
	sta inner_cnt
inner_loop:
	lda theta
	clc
	adc theta_k_i
	tay
	lda sin_table,y		
	clc
	adc #128
	tax						; X = 128 + sin(theta + k * theta)
	ldy theta_k_i 			
	lda sin_table,y
	clc
	adc #96
	tay						; Y = 96 + sin(theta_k_i)
	jsr put_pixel   		; and destroy A,X,Y ...
op_theta_k_i:
	inc theta_k_i			; SMC: patched to INC ($E6) or DEC ($C6)
	dec inner_cnt
	bne inner_loop
	; advance base angles
op_theta:
	inc theta				; SMC: patched to INC ($E6) or DEC ($C6)
	lda theta_k
	clc
op_theta_k:
	adc k_step				; SMC: patched to ADC ($65) or SBC ($E5)
	sta theta_k
	lda theta
	bne outer_loop
	rts
.endp

.proc build_display_list
	mwa #dl_mem adr
	ldy #0
	lda #$45 
	sta (adr),y
	iny
	lda #<screen_text_top
	sta (adr),y
	iny
	lda #>screen_text_top
	sta (adr),y
	iny
	lda #$02
	sta (adr),y
	iny
	lda #$4F
	sta (adr),y
	iny
	lda #<gfx_mem
	sta (adr),y
	iny
	lda #>gfx_mem
	sta (adr),y
	iny
	lda #$0F
	ldx #127
dl1:
	sta (adr),y
	iny
	dex
	bne dl1
	lda #$4F
	sta (adr),y
	iny
	lda #<(gfx_mem+$1000)
	sta (adr),y
	iny
	lda #>(gfx_mem+$1000)
	sta (adr),y
	iny
	lda #$0F
	ldx #63
dl2:
	sta (adr),y
	iny
	dex
	bne dl2
	lda #$42 				; mode 2 with LMS for text
	sta (adr),y
	iny
	lda #<screen_text_bottom
	sta (adr),y
	iny
	lda #>screen_text_bottom
	sta (adr),y
	iny
	lda #$05
	sta (adr),y
	iny
	lda #$41 ; JMP
	sta (adr),y
	iny
	lda #<dl_mem
	sta (adr),y
	iny
	lda #>dl_mem
	;sta (adr),y 			; removing it might not be safe! Luck it works.
	rts
.endp

; 16-bit Galois LFSR (poly $B400), shift right, mixed with POKEY random.
; $B400 is the standard 16‑bit maximal-length Galois LFSR tap mask for
; a right-shift register. It corresponds to the primitive polynomial
; x^16 + x^14 + x^13 + x^11 + 1, giving a full 2^16−1 period with good
; bit dispersion at very low code cost.
; https://en.wikipedia.org/wiki/Linear-feedback_shift_register

.ifdef INCLUDE_LFSR
.proc next_random_number
	lda seedHi
	lsr
	ror seedLo
	sta seedHi
	bcc nr_mix
	lda seedLo
	eor #$B4
	sta seedLo
nr_mix:
	lda random
	eor seedLo
	sta seedLo
	lda random
	eor seedHi
	sta seedHi
	lda seedLo
	rts
.endp
.endif

; Build 192 entry table of gfx_mem + 32 * y at rowtab_lo/hi (runtime only)

.proc build_row_table
	mwa #gfx_mem adr
	ldy #0
brt_loop:
	lda adr
	sta rowtab_lo,y
	lda adr+1
	sta rowtab_hi,y
	clc
	lda adr
	adc #32
	sta adr
	bcc brt_next
	inc adr+1
brt_next:
	iny
	cpy #192
	bne brt_loop
	rts
.endp

.proc build_sin_table
	ldy #0
bst_loop:
	cpy #64
	bcc quad0
	cpy #128
	bcc quad1
	cpy #192
	bcc quad2
; quadrant 3 --- -- - 
	tya
	eor #$FF
	tax
	lda sin64,x
	eor #$FF
	clc
	adc #1
	jmp store
; quadrant 2 --- -- - 
quad2:
	tya
	sec
	sbc #128
	tax
	lda sin64,x
	eor #$FF
	clc
	adc #1
	jmp store
; quadrant 1 --- -- - 
quad1:
	tya
	eor #$7F
	tax
	lda sin64,x
	jmp store
; quadrant 0 --- -- - 
quad0:
	lda sin64,y
store:
	sta sin_table,y
	iny
	bne bst_loop
	rts
.endp

.proc set_pixel_color 		; A=0 --> clear pixels, A=1 --> set pixels
	;and #1
	sta pixelColor
	beq spc_clear
	lda #$05
	sta put_pixel.op_mask 	; set: ORA zp (opcode $05), maskXor = $00
	lda #$00
	sta maskXor
	rts
spc_clear:
	lda #$25
	sta put_pixel.op_mask	; clear: AND zp (opcode $25), maskXor = $FF
	lda #$FF
	sta maskXor
	rts
.endp

; Direct memory access via precomputed row table
; Precomputed bit masks (no shifting loops)
; Self-modifying code eliminat, no conditional branching
; Single-mode optimized (ANTIC $0F narrow)

.proc put_pixel ; and destroy A,X,Y
	sty oldY
	stx tmpX
	ldy oldY
	lda rowtab_lo,y
	sta adr
	lda rowtab_hi,y
	sta adr+1
	ldx tmpX
	txa
	lsr
	lsr
	lsr
	tay
	lda tmpX
	and #7
	tax
	lda scr.masks8,x
	eor maskXor
	sta oldY				; reuse as mask temp
	lda (adr),y
op_mask:
	ora oldY				; Self Modifying Code: Patched to ORA zp ($05) or AND zp ($25)
	sta (adr),y
	rts
.endp

.ifdef INCLUDE_PMG_GRAPHICS
.proc init_pmg
	lda #>pmg_mem
	sta $D407				; PMBASE
	lda #$03				; Enable players & missiles
	sta $D01D				; GRACTL
/*
	lda #0
	sta sizep0
	sta sizep1
	sta sizep2
	sta sizep3
	; Set GTIA priority mode with 0
	sta gtictl
*/
	lda #$1A
	sta pcolr0s
	lda #$18
	sta pcolr1s
	lda #$26
	sta pcolr2s
	lda #$24
	sta pcolr3s

	step = 2
	ppos = 56+step
	mpos = 190+step

	lda #ppos
	sta hposp3
	lda #ppos+step
	sta hposp2
	lda #ppos+step*2
	sta hposp1	
	lda #ppos+step*3
	sta hposp0

	lda #mpos
	sta hposm0
	lda #mpos+step
	sta hposm1
	lda #mpos+step*2
	sta hposm2
	lda #mpos+step*3
	sta hposm3

	; Fill PMG memory (2K) with ...
	; https://www.atariarchives.org/mapping/appendix7.php

	mwa #pmg_mem adr
	ldx #4
	lda #%10101010
	jsr fill_pages
	
	mwa #pmg_mem+$400 adr
	ldx #4
	lda #%10000000
	jsr fill_pages
	rts

fill_pages:				; Fill X pages with pattern in A
	ldy #0
loop:
	sta (adr),y
	iny
	bne loop
	inc adr+1
	dex
	bne loop
	rts
.endp
.endif

; -----------------------------------------------------------------------------

	ini main ; start