That's a lot of handy tips. Especially big thanks for memcpy_w and memcpy_l explanation.
Yes, main transfer is done during vblank.
Yes, I have learned that slDMAWait is unnecessary in this case and will drag the system to a crawl. (A real system!)
I've now got it to where I don't need that much more optimization using slDMA. I will look over SBL, maybe its transfers are faster.
I've got it to where it is clear when listening to the music that it is a cyclic desync. (Unfortunately, it starts out desync'd, but I will get to that later)
I could also restructure my buffer so it uses less copies/DMAs to begin with.
/e: memcpy_w is the proper way to transfer to audio RAM (if that is the method), however it does not seem to work at Vblank. Which is a shame, since the program works perfectly if memcpy_w is the function used for transfer to the playback region.
;************************************************************************
; HOST Command = $85 : PCM stream play start *
; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *
; P1 : D7 = L ( mono ) / H ( stereo ) *
; D6 = no care *
; D5 = no care *
; D4 = L ( 16bit PCM ) / H ( 8bit PCM ) *
; D3 = no care *
; D2~D0 = PCM Stream# *
; P2 : D7~D5 = Direct level [DISDL] *
; D4~D0 = Direct Pan [DIPAN] *
; P3 : D7~D0 = PCM start address bit19~12 *
; P4 : D7~D0 = // bit11~4 ( bit3~0 = always "0" ) *
; P5,6 : D15~D0 = PCM data sample ( memory use ) size High byte per 1ch*
; P7,8 : D14~D11 = [OCT] *
; D9 ~D0 = [FNS] *
; P9 : D6~D3 = [ISEL] Rch ( stereo/mono ) *
; D2~D0 = [IMXL] Rch ( stereo/mono ) *
; P10 : D6~D3 = [ISEL] Lch ( stereo ) *
; D2~D0 = [IMXL] Lch ( stereo ) *
; P11 : D7~D0 = total level *
;************************************************************************
global drv_PCM_st
drv_PCM_st:
lea Pcm_Strm(a6),a3 ; PCM stream# stack area
lea slot_work(a6),a2 ;
move.b HOST_P1(a0),d7 ; P1 : bit7 H = stereo
bpl drv_85_mono
;===============================;
drv_85_stereo: andi.w #7,d7 ; = PCM Stream#
add.w d7,d7 ;
move.b 0(a3,d7.w),d4 ; R : play flag & slot#
bset.b #7,0(a3,d7.w) ;
andi.w #$1F,d4 ;
lsl.w #6,d4 ;
move.b 1(a3,d7.w),d5 ; L : play flag & slot#
bset.b #7,1(a3,d7.w) ;
andi.w #$1F,d5 ;
lsl.w #6,d5 ;
;-------------------------------;
; d4.w = ( R ) slot work offset
; d5.w = ( L ) slot work offset
drv_85_st1: move.b HOST_P1(a0),d0 ; P1 : D4 = 8bit / D210 = #
move.b d0,d7 ;
andi.w #7,d0 ; = PCM Stream#
ori.b #$80,d0 ;
move.b d0,PSPN(a2,d4.w) ; save PCM Stream# & play flag
move.b d0,PSPN(a2,d5.w) ; save PCM Stream# & play flag
lsr.w #1,d4 ; $00,$20,..,$3E0
lsr.w #1,d5 ;
move.b #$10,$00(a5,d4.w) ; <<< key off >>>
move.b #$10,$00(a5,d5.w) ; <<< key off >>>
andi.w #00010000B,d7 ; 16/8 bit PCM ?
ori.w #00100000B,d7 ; set Forward loop
swap d7 ;
clr.w d7 ;
moveq #0,d0 ;
move.w d0,SCSP_PCM_LSA(a5,d4.w) ; [LSA] R = 0000
move.w d0,SCSP_PCM_LSA(a5,d5.w) ; [LSA] L = 0000
move.w HOST_P3(a0),d0 ; P3,4 : [SA] 上位16bits
lsl.l #4,d0 ; *10H
or.l d7,d0 ;
move.l d0,(a5,d4.w) ; <<< [SA] R >>>
moveq #0,d1 ;
move.b d1,SCSP_SISD(a5,d4.w) ; <<< [TL/MDL] = 0 : R >>>
;@ move.b HOST_P7(a0),SCSP_TLVL(a5,d4.w) ;
move.b HOST_P11(a0),SCSP_TLVL(a5,d4.w) ;
move.w d1,SCSP_MDLSL(a5,d4.w) ; <<< [TL/MDL] = 0 : R >>>
move.b d1,SCSP_SISD(a5,d5.w) ; <<< [TL/MDL] = 0 : L >>>
;@ move.b HOST_P7(a0),SCSP_TLVL(a5,d5.w) ;
move.b HOST_P11(a0),SCSP_TLVL(a5,d5.w) ;
move.w d1,SCSP_MDLSL(a5,d5.w) ; <<< [TL/MDL] = 0 : L >>>
move.w d1,SCSP_RELFO(a5,d4.w) ; <<< [LFO] : R >>>
move.w d1,SCSP_RELFO(a5,d5.w) ; <<< [LFO] : L >>>
move.w HOST_P5(a0),d1 ; = 1,2,...,FFFF,0000
subq.w #1,d1 ; = 0,1,...,FFFE,FFFF
move.w d1,SCSP_PCM_LEA(a5,d4.w) ; <<< [LEA] R >>>
move.w d1,SCSP_PCM_LEA(a5,d5.w) ; <<< [LEA] L >>>
addq.l #1,d1 ; = 1,2,...,FFFF,10000
; d0.l = [LPCTL][8BIT][SA] R
; d1.l = PCM sample size / slot
add.l d1,d0 ; + data sample size
btst.l #20,d7 ; 8/16 bit ?
bne.s drv_85_1 ; jump if 8bit data
add.l d1,d0 ; + data sample size
drv_85_1: move.l d0,(a5,d5.w) ; <<< [SA] L >>>
move.b HOST_P2(a0),d0 ; P2 : [DISDL],[DIPAN]
andi.b #$E0,d0 ;
ori.b #$0F,d0 ; Pan = [R]
; == for monoral TV 対応 ==
btst.b #7,SND_OUT_ST(a6) ; MONO/STEREO status
beq.s drv_85_3ST ; jump if STEREO mode
andi.b #$F0,d0 ; set Center
drv_85_3ST: move.b d0,SCSP_DISDLPN(a5,d4.w) ; <<< [DISDL],[DIPAN] R >>>
add.w d4,d4
move.b d0,sl_DISDLPAN(a2,d4.w)
lsr.w #1,d4
ori.b #$10,d0 ; set Pan = [L]
move.b d0,SCSP_DISDLPN(a5,d5.w) ; <<< [DISDL],[DIPAN] L >>>
add.w d5,d5
move.b d0,sl_DISDLPAN(a2,d5.w)
lsr.w #1,d5
move.l #$001F001F,d0 ; AR,RR = 1FH
move.l d0,$08(a5,d4.w) ; <<< [D2R][D1R][EHLD][AR] >>>
move.l d0,$08(a5,d5.w) ; <<< [D2R][D1R][EHLD][AR] >>>
move.b HOST_P9(a0),d0
move.b d0,$15(a5,d4.w) ; <<< [ISEL],[IMXL] : R >>>
move.b HOST_P10(a0),d0
move.b d0,$15(a5,d5.w) ; <<< [ISEL],[IMXL] : L >>>
move.w HOST_P7(a0),d0
move.w d0,$10(a5,d4.w) ; <<< [OCT],[FNS] : R >>>
move.w d0,$10(a5,d5.w) ; <<< [OCT],[FNS] : L >>>
move.b #$08,$00(a5,d4.w) ; <<< key on 準備 >>>
move.b #$18,$00(a5,d5.w) ; <<< 同時 key on >>>
rts
;===============================;
; d7.b = P1:mode
drv_85_mono: andi.w #7,d7 ; = PCM Stream#
add.w d7,d7 ;
move.b 0(a3,d7.w),d4 ; R : play flag & slot#
bset.b #7,0(a3,d7.w) ;
andi.w #$1F,d4 ;
lsl.w #6,d4 ;
move.b 1(a3,d7.w),d5 ; L : play flag & slot#
bclr.b #7,1(a3,d7.w) ;
andi.w #$1F,d5 ;
lsl.w #6,d5 ;
;-------------------------------;
; d4.w = ( R ) slot work offset
; d5.w = ( L ) slot work offset
drv_85_mn1: move.b HOST_P1(a0),d0 ; P1 : D4 = 8bit / D210 = #
move.b d0,d7 ;
andi.w #7,d0 ; = PCM Stream#
ori.b #$80,d0 ;
move.b d0,PSPN(a2,d4.w) ; save PCM Stream# & play flag
move.b PSPN(a2,d5.w),d0 ; L 成分
move.b #0,PSPN(a2,d5.w) ; clear PCM Stream# & play flag
lsr.w #1,d4 ; $00,$20,..,$3E0
lsr.w #1,d5 ;
move.b #$10,$00(a5,d4.w) ; <<< key off >>>
move.b #$10,$00(a5,d5.w) ; <<< key off >>>
andi.w #00010000B,d7 ; 16/8 bit PCM ?
ori.w #00100000B,d7 ; set forward loop
swap d7 ;
clr.w d7 ;
moveq #0,d0 ;
move.w d0,SCSP_PCM_LSA(a5,d4.w) ; <<< [LSA] >>>
move.w HOST_P3(a0),d0 ; P3,4 : [SA] 上位16bits
lsl.l #4,d0 ;
or.l d7,d0 ;
move.l d0,(a5,d4.w) ; <<< [SA] >>>
;@ move.l d0,d3 ; stack d3.l = [SA] R
moveq #0,d1 ;
move.b d1,SCSP_SISD(a5,d4.w) ; <<< [SI],[SD] = 0 >>>
move.b HOST_P11(a0),SCSP_TLVL(a5,d4.w) ;
move.w d1,SCSP_MDLSL(a5,d4.w) ; <<< [MSL],[MDXYSL] = 0 >>>
move.w d1,SCSP_RELFO(a5,d4.w) ; <<< [LFO] >>>
move.w HOST_P5(a0),d1 ; = 1,2,...,FFFF,0000
subq.w #1,d1 ; = 0,1,...,FFFE,FFFF
move.w d1,SCSP_PCM_LEA(a5,d4.w) ; <<< [LEA] >>>
move.b HOST_P2(a0),d0 ; P2 : [DISDL],[DIPAN]
btst.b #7,SND_OUT_ST(a6) ; MONO/STEREO status
beq.s drv_85_6ST ; jump if STEREO mode
andi.b #$F0,d0 ; set Center
drv_85_6ST:
move.b d0,SCSP_DISDLPN(a5,d4.w) ; <<< [DISDL],[DIPAN] >>>
move.l #$001F001F,SCSP_D2R1R(a5,d4.w) ; <<< AR,RR = 1FH >>>
move.b HOST_P9(a0),SCSP_ISEL+1(a5,d4.w) ; <<< [ISEL],[IMXL] >>>
move.w HOST_P7(a0),SCSP_OCTFNS(a5,d4.w) ; <<< [OCT],[FNS] >>>
move.b #$18,SCSP_KXKB(a5,d4.w) ; <<< key on >>>
add.w d4,d4
move.b d0,sl_DISDLPAN(a2,d4.w)
rts
I'd like to have the copy run at Vblank, otherwise it will become desynchronized when interrupted.
Oh well, I'm going to burn a CD real quick.