Increase MSP432™ SPI Performance – Part 2

Ok, here it is, the Fill function written in assembler.
Due some caching of hardware register addresses in the Core registers, I was able to reduce the following code from 8+ steps to 4.

UCB2TXBUF = data;
while (UCB2STATW & UCBUSY);

The following assembler source is the first full standalone assembler code I wrote. Don’t be mad at me!

ILICTL_POUT   .word 0x40004C42 ; P5OUT
ILIUCI_TXBUF  .word 0x4000280E ; UCB2TXBUF
ILIUCI_STATW  .word 0x40002808 ; UCB2STATW
ILI9341_RAMWR .set 0x2c
ILICTL_DCX    .set 0x0004
UCBUSY        .set 0x0001
 
;############################################################################################
; extern void ILI9341_FillASM(uint16_t color, uint32_t count32);
    .global ILI9341_FillASM
ILI9341_FillASM: .asmfunc
 
Color   .set r0 ; r0 = Color ; Param 0
Count   .set r1 ; r1 = Count ; Param 1
TXBUF   .set r2 ; r2 = ILIUCI_TXBUF
STATW   .set r3 ; r3 = ILIUCI_STATW
POUT    .set r4 ; r4 = ILICTL_POUT
ColorH  .set r5 ; r5 = Color >> 8
Slow    .set r6 ; r6 = Count & 0x0007
Buffer  .set r7
 
    push {r4-r7}
 
; cache hardware register addresses
    ldr TXBUF, ILIUCI_TXBUF
    ldr STATW, ILIUCI_STATW
    ldr POUT, ILICTL_POUT
 
; ILI_COMMAND
    ldrb Buffer, [POUT]
    bic Buffer, Buffer, #ILICTL_DCX
    strb Buffer, [POUT]
 
; transmit ramwr byte
    mov Buffer, #ILI9341_RAMWR
    strb Buffer, [TXBUF]
 
RAMWR_Busy_L: ; wait until transmit
        ldrb Buffer, [STATW]
        tst Buffer, #UCBUSY
        bne RAMWR_Busy_L
 
; ILI_DATA
    ldrb Buffer, [POUT]
    orr Buffer, Buffer, #ILICTL_DCX
    strb Buffer, [POUT]
 
    lsr ColorH, Color, #8 ; Color >> 8
    ands Slow, Count, #0x0007 ; Test if lower bits of count is set
    beq Fast
 
Slow_L: ; Slow data loop
        strb ColorH, [TXBUF] ; Color H
Slow_Busy_L0: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Slow_Busy_L0
        strb Color, [TXBUF] ; Color L
Slow_Busy_L1: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Slow_Busy_L1
        subs Slow, Slow, #1
        bne Slow_L ; continue while slow data
 
Fast: ; Fast data
    lsrs Count, Count, #3 ; count >> 3
    beq End ; if count = 0, goto end
 
Fast_L: ; fast data loop
        ; ----------------- Data 0 -----------------
        strb ColorH, [TXBUF] ; Color H
Fast_Busy_L0: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L0
        strb Color, [TXBUF] ; Color L
Fast_Busy_L1: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L1
 
        ; ----------------- Data 1 -----------------
        strb ColorH, [TXBUF] ; Color H
Fast_Busy_L2: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L2
        strb Color, [TXBUF] ; Color L
Fast_Busy_L3: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L3
 
        ; ----------------- Data 2 -----------------
        strb ColorH, [TXBUF] ; Color H
Fast_Busy_L4: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L4
        strb Color, [TXBUF] ; Color L
Fast_Busy_L5: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L5
 
        ; ----------------- Data 3 -----------------
        strb ColorH, [TXBUF] ; Color H
Fast_Busy_L6: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L6
        strb Color, [TXBUF] ; Color L
Fast_Busy_L7: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L7
 
        ; ----------------- Data 4 -----------------
        strb ColorH, [TXBUF] ; Color H
Fast_Busy_L8: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L8
        strb Color, [TXBUF] ; Color L
Fast_Busy_L9: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L9
 
        ; ----------------- Data 5 -----------------
        strb ColorH, [TXBUF] ; Color H
Fast_Busy_L10: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L10
        strb Color, [TXBUF] ; Color L
Fast_Busy_L11: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L11
 
        ; ----------------- Data 6 -----------------
        strb ColorH, [TXBUF] ; Color H
Fast_Busy_L12: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L12
        strb Color, [TXBUF] ; Color L
Fast_Busy_L13: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L13
 
        ; ----------------- Data 7 -----------------
        strb ColorH, [TXBUF] ; Color H
Fast_Busy_L14: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L14
        strb Color, [TXBUF] ; Color L
Fast_Busy_L15: ; wait until transmit
            ldrb Buffer, [STATW]
            tst Buffer, #UCBUSY
            bne Fast_Busy_L15
 
        subs Count, Count, #1
        bne Fast_L ; continue while fast data
 
End:
    pop {r4-r7}
    bx lr
    .endasmfunc