usbdrv/usbdrvasm20.inc

changeset 0
9e9b2c78bd31
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usbdrv/usbdrvasm20.inc	Thu Feb 16 14:40:23 2017 +0100
@@ -0,0 +1,359 @@
+/* Name: usbdrvasm20.inc
+ * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
+ * Author: Jeroen Benschop
+ * Based on usbdrvasm16.inc from Christian Starkjohann
+ * Creation Date: 2008-03-05
+ * Tabsize: 4
+ * Copyright: (c) 2008 by Jeroen Benschop and OBJECTIVE DEVELOPMENT Software GmbH
+ * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
+ */
+
+/* Do not link this file! Link usbdrvasm.S instead, which includes the
+ * appropriate implementation!
+ */
+
+/*
+General Description:
+This file is the 20 MHz version of the asssembler part of the USB driver. It
+requires a 20 MHz crystal (not a ceramic resonator and not a calibrated RC
+oscillator).
+
+See usbdrv.h for a description of the entire driver.
+
+Since almost all of this code is timing critical, don't change unless you
+really know what you are doing! Many parts require not only a maximum number
+of CPU cycles, but even an exact number of cycles!
+*/
+
+#define leap2   x3
+#ifdef __IAR_SYSTEMS_ASM__
+#define nextInst    $+2
+#else
+#define nextInst    .+0
+#endif
+
+;max stack usage: [ret(2), YL, SREG, YH, bitcnt, shift, x1, x2, x3, x4, cnt] = 12 bytes
+;nominal frequency: 20 MHz -> 13.333333 cycles per bit, 106.666667 cycles per byte
+; Numbers in brackets are clocks counted from center of last sync bit
+; when instruction starts
+;register use in receive loop:
+; shift assembles the byte currently being received
+; x1 holds the D+ and D- line state
+; x2 holds the previous line state
+; x4 (leap)  is used to add a leap cycle once every three bytes received
+; X3 (leap2) is used to add a leap cycle once every three stuff bits received
+; bitcnt is used to determine when a stuff bit is due
+; cnt holds the number of bytes left in the receive buffer
+
+USB_INTR_VECTOR:
+;order of registers pushed: YL, SREG YH, [sofError], bitcnt, shift, x1, x2, x3, x4, cnt
+    push    YL                  ;[-28] push only what is necessary to sync with edge ASAP
+    in      YL, SREG            ;[-26]
+    push    YL                  ;[-25]
+    push    YH                  ;[-23]
+;----------------------------------------------------------------------------
+; Synchronize with sync pattern:
+;----------------------------------------------------------------------------
+;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
+;sync up with J to K edge during sync pattern -- use fastest possible loops
+;The first part waits at most 1 bit long since we must be in sync pattern.
+;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to
+;waitForJ, ensure that this prerequisite is met.
+waitForJ:
+    inc     YL
+    sbis    USBIN, USBMINUS
+    brne    waitForJ        ; just make sure we have ANY timeout
+waitForK:
+;The following code results in a sampling window of < 1/4 bit which meets the spec.
+    sbis    USBIN, USBMINUS     ;[-19]
+    rjmp    foundK              ;[-18]
+    sbis    USBIN, USBMINUS
+    rjmp    foundK
+    sbis    USBIN, USBMINUS
+    rjmp    foundK
+    sbis    USBIN, USBMINUS
+    rjmp    foundK
+    sbis    USBIN, USBMINUS
+    rjmp    foundK
+    sbis    USBIN, USBMINUS
+    rjmp    foundK
+    sbis    USBIN, USBMINUS
+    rjmp    foundK
+    sbis    USBIN, USBMINUS
+    rjmp    foundK
+    sbis    USBIN, USBMINUS
+    rjmp    foundK
+#if USB_COUNT_SOF
+    lds     YL, usbSofCount
+    inc     YL
+    sts     usbSofCount, YL
+#endif  /* USB_COUNT_SOF */
+#ifdef USB_SOF_HOOK
+    USB_SOF_HOOK
+#endif
+    rjmp    sofError
+foundK:                         ;[-16]
+;{3, 5} after falling D- edge, average delay: 4 cycles
+;bit0 should be at 34 for center sampling. Currently at 4 so 30 cylces till bit 0 sample
+;use 1 bit time for setup purposes, then sample again. Numbers in brackets
+;are cycles from center of first sync (double K) bit after the instruction
+    push    bitcnt              ;[-16]
+;   [---]                       ;[-15]
+    lds     YL, usbInputBufOffset;[-14]
+;   [---]                       ;[-13]
+    clr     YH                  ;[-12]
+    subi    YL, lo8(-(usbRxBuf));[-11] [rx loop init]
+    sbci    YH, hi8(-(usbRxBuf));[-10] [rx loop init]
+    push    shift               ;[-9]
+;   [---]                       ;[-8]
+    ldi     shift,0x40          ;[-7] set msb to "1" so processing bit7 can be detected
+    nop2                        ;[-6]
+;   [---]                       ;[-5]
+    ldi     bitcnt, 5           ;[-4] [rx loop init]
+    sbis    USBIN, USBMINUS     ;[-3] we want two bits K (sample 3 cycles too early)
+    rjmp    haveTwoBitsK        ;[-2]
+    pop     shift               ;[-1] undo the push from before
+    pop     bitcnt              ;[1] 
+    rjmp    waitForK            ;[3] this was not the end of sync, retry
+; The entire loop from waitForK until rjmp waitForK above must not exceed two
+; bit times (= 27 cycles).
+
+;----------------------------------------------------------------------------
+; push more registers and initialize values while we sample the first bits:
+;----------------------------------------------------------------------------
+haveTwoBitsK:
+    push    x1                  ;[0]
+    push    x2                  ;[2]
+    push    x3                  ;[4] (leap2)
+    ldi     leap2, 0x55         ;[6] add leap cycle on 2nd,5th,8th,... stuff bit
+    push    x4                  ;[7] == leap
+    ldi     leap, 0x55          ;[9] skip leap cycle on 2nd,5th,8th,... byte received
+    push    cnt                 ;[10]
+    ldi     cnt, USB_BUFSIZE    ;[12] [rx loop init]
+    ldi     x2, 1<<USBPLUS      ;[13] current line state is K state. D+=="1", D-=="0"
+bit0:       
+    in      x1, USBIN           ;[0] sample line state
+    andi    x1, USBMASK         ;[1] filter only D+ and D- bits
+    rjmp    handleBit           ;[2] make bit0 14 cycles long
+
+;----------------------------------------------------------------------------
+; Process bit7. However, bit 6 still may need unstuffing.
+;----------------------------------------------------------------------------
+
+b6checkUnstuff:
+    dec     bitcnt              ;[9]
+    breq    unstuff6            ;[10]
+bit7:
+    subi    cnt, 1              ;[11] cannot use dec becaus it does not affect the carry flag
+    brcs    overflow            ;[12] Too many bytes received. Ignore packet
+    in      x1, USBIN           ;[0] sample line state
+    andi    x1, USBMASK         ;[1] filter only D+ and D- bits
+    cpse    x1, x2              ;[2] when previous line state equals current line state, handle "1"
+    rjmp    b7handle0           ;[3] when line state differs, handle "0"
+    sec                         ;[4]
+    ror     shift               ;[5] shift "1" into the data
+    st      y+, shift           ;[6] store the data into the buffer
+    ldi     shift, 0x40         ;[7] reset data for receiving the next byte
+    subi    leap, 0x55          ;[9] trick to introduce a leap cycle every 3 bytes
+    brcc    nextInst            ;[10 or 11] it will fail after 85 bytes. However low speed can only receive 11
+    dec     bitcnt              ;[11 or 12]
+    brne    bit0                ;[12 or 13]
+    ldi     x1, 1               ;[13 or 14] unstuffing bit 7
+    in      bitcnt, USBIN       ;[0] sample stuff bit
+    rjmp    unstuff             ;[1]
+
+b7handle0:
+    mov     x2,x1               ;[5] Set x2 to current line state
+    ldi     bitcnt, 6           ;[6]
+    lsr     shift               ;[7] shift "0" into the data
+    st      y+, shift           ;[8] store data into the buffer
+    ldi     shift, 0x40         ;[10] reset data for receiving the next byte
+    subi    leap, 0x55          ;[11] trick to introduce a leap cycle every 3 bytes
+    brcs    bit0                ;[12] it will fail after 85 bytes. However low speed can only receive 11
+    rjmp    bit0                ;[13]
+
+
+;----------------------------------------------------------------------------
+; Handle unstuff
+; x1==0xFF indicate unstuffing bit6
+;----------------------------------------------------------------------------
+
+unstuff6:
+    ldi     x1,0xFF             ;[12] indicate unstuffing bit 6
+    in      bitcnt, USBIN       ;[0]  sample stuff bit
+    nop                         ;[1]  fix timing
+unstuff:                        ;b0-5  b6   b7
+    mov     x2,bitcnt           ;[3]  [2]  [3]  Set x2 to match line state
+    subi    leap2, 0x55         ;[4]  [3]  [4]  delay loop
+    brcs    nextInst            ;[5]  [4]  [5]  add one cycle every three stuff bits
+    sbci    leap2,0             ;[6]  [5]  [6]
+    ldi     bitcnt,6            ;[7]  [6]  [7]  reset bit stuff counter
+    andi    x2, USBMASK         ;[8]  [7]  [8] only keep D+ and D-
+    cpi     x1,0                ;[9]  [8]  [9]
+    brmi    bit7                ;[10] [9]  [10] finished unstuffing bit6 When x1<0
+    breq    bitloop             ;[11] ---  [11] finished unstuffing bit0-5 when x1=0
+    nop                         ;---  ---  [12]
+    in      x1, USBIN           ;---  ---  [0] sample line state for bit0
+    andi    x1, USBMASK         ;---  ---  [1] filter only D+ and D- bits
+    rjmp    handleBit           ;---  ---  [2] make bit0 14 cycles long
+
+;----------------------------------------------------------------------------
+; Receiver loop (numbers in brackets are cycles within byte after instr)
+;----------------------------------------------------------------------------
+bitloop:
+    in      x1, USBIN           ;[0] sample line state
+    andi    x1, USBMASK         ;[1] filter only D+ and D- bits
+    breq    se0                 ;[2] both lines are low so handle se0
+handleBit:
+    cpse    x1, x2              ;[3] when previous line state equals current line state, handle "1"
+    rjmp    handle0             ;[4] when line state differs, handle "0"
+    sec                         ;[5]
+    ror     shift               ;[6] shift "1" into the data
+    brcs    b6checkUnstuff      ;[7] When after shift C is set, next bit is bit7
+    nop2                        ;[8]
+    dec     bitcnt              ;[10]
+    brne    bitloop             ;[11]
+    ldi     x1,0                ;[12] indicate unstuff for bit other than bit6 or bit7
+    in      bitcnt, USBIN       ;[0] sample stuff bit
+    rjmp    unstuff             ;[1]
+
+handle0:
+    mov     x2, x1              ;[6] Set x2 to current line state
+    ldi     bitcnt, 6           ;[7] reset unstuff counter. 
+    lsr     shift               ;[8] shift "0" into the data
+    brcs    bit7                ;[9] When after shift C is set, next bit is bit7
+    nop                         ;[10]
+    rjmp    bitloop             ;[11] 
+    
+;----------------------------------------------------------------------------
+; End of receive loop. Now start handling EOP
+;----------------------------------------------------------------------------
+
+macro POP_STANDARD ; 14 cycles
+    pop     cnt
+    pop     x4
+    pop     x3
+    pop     x2
+    pop     x1
+    pop     shift
+    pop     bitcnt
+    endm
+macro POP_RETI     ; 7 cycles
+    pop     YH
+    pop     YL
+    out     SREG, YL
+    pop     YL
+    endm
+
+
+
+#include "asmcommon.inc"
+
+; USB spec says:
+; idle = J
+; J = (D+ = 0), (D- = 1)
+; K = (D+ = 1), (D- = 0)
+; Spec allows 7.5 bit times from EOP to SOP for replies
+; 7.5 bit times is 100 cycles. This implementation arrives a bit later at se0
+; then specified in the include file but there is plenty of time
+
+bitstuffN:
+    eor     x1, x4          ;[8]
+    ldi     x2, 0           ;[9]
+    nop2                    ;[10]
+    out     USBOUT, x1      ;[12] <-- out
+    rjmp    didStuffN       ;[0]
+    
+bitstuff7:
+    eor     x1, x4          ;[6]
+    ldi     x2, 0           ;[7] Carry is zero due to brcc
+    rol     shift           ;[8] compensate for ror shift at branch destination
+    nop2                    ;[9]
+    rjmp    didStuff7       ;[11]
+
+sendNakAndReti:
+    ldi     x3, USBPID_NAK  ;[-18]
+    rjmp    sendX3AndReti   ;[-17]
+sendAckAndReti:
+    ldi     cnt, USBPID_ACK ;[-17]
+sendCntAndReti:
+    mov     x3, cnt         ;[-16]
+sendX3AndReti:
+    ldi     YL, 20          ;[-15] x3==r20 address is 20
+    ldi     YH, 0           ;[-14]
+    ldi     cnt, 2          ;[-13]
+;   rjmp    usbSendAndReti      fallthrough
+
+;usbSend:
+;pointer to data in 'Y'
+;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
+;uses: x1...x4, btcnt, shift, cnt, Y
+;Numbers in brackets are time since first bit of sync pattern is sent
+;We don't match the transfer rate exactly (don't insert leap cycles every third
+;byte) because the spec demands only 1.5% precision anyway.
+usbSendAndReti:             ; 12 cycles until SOP
+    in      x2, USBDDR      ;[-12]
+    ori     x2, USBMASK     ;[-11]
+    sbi     USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
+    in      x1, USBOUT      ;[-8] port mirror for tx loop
+    out     USBDDR, x2      ;[-7] <- acquire bus
+; need not init x2 (bitstuff history) because sync starts with 0
+    ldi     x4, USBMASK     ;[-6] exor mask
+    ldi     shift, 0x80     ;[-5] sync byte is first byte sent
+txByteLoop:
+    ldi     bitcnt, 0x49    ;[-4]        [10] binary 01001001
+txBitLoop:
+    sbrs    shift, 0        ;[-3] [10]   [11]
+    eor     x1, x4          ;[-2] [11]   [12]
+    out     USBOUT, x1      ;[-1] [12]   [13]   <-- out N
+    ror     shift           ;[0]  [13]   [14]
+    ror     x2              ;[1]
+didStuffN:
+    nop2                    ;[2]
+    nop                     ;[4]
+    cpi     x2, 0xfc        ;[5]
+    brcc    bitstuffN       ;[6]
+    lsr     bitcnt          ;[7]
+    brcc    txBitLoop       ;[8]
+    brne    txBitLoop       ;[9]
+
+    sbrs    shift, 0        ;[10]
+    eor     x1, x4          ;[11]
+didStuff7:
+    out     USBOUT, x1      ;[-1] [13] <-- out 7
+    ror     shift           ;[0] [14]
+    ror     x2              ;[1]
+    nop                     ;[2]
+    cpi     x2, 0xfc        ;[3]
+    brcc    bitstuff7       ;[4]
+    ld      shift, y+       ;[5]
+    dec     cnt             ;[7]
+    brne    txByteLoop      ;[8]
+;make SE0:
+    cbr     x1, USBMASK     ;[9] prepare SE0 [spec says EOP may be 25 to 30 cycles]
+    lds     x2, usbNewDeviceAddr;[10]
+    lsl     x2              ;[12] we compare with left shifted address
+    out     USBOUT, x1      ;[13] <-- out SE0 -- from now 2 bits = 22 cycles until bus idle
+    subi    YL, 20 + 2      ;[0] Only assign address on data packets, not ACK/NAK in x3
+    sbci    YH, 0           ;[1]
+;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
+;set address only after data packet was sent, not after handshake
+    breq    skipAddrAssign  ;[2]
+    sts     usbDeviceAddr, x2; if not skipped: SE0 is one cycle longer
+skipAddrAssign:
+;end of usbDeviceAddress transfer
+    ldi     x2, 1<<USB_INTR_PENDING_BIT;[4] int0 occurred during TX -- clear pending flag
+    USB_STORE_PENDING(x2)   ;[5]
+    ori     x1, USBIDLE     ;[6]
+    in      x2, USBDDR      ;[7]
+    cbr     x2, USBMASK     ;[8] set both pins to input
+    mov     x3, x1          ;[9]
+    cbr     x3, USBMASK     ;[10] configure no pullup on both pins
+    ldi     x4, 5           ;[11]
+se0Delay:
+    dec     x4              ;[12] [15] [18] [21] [24]
+    brne    se0Delay        ;[13] [16] [19] [22] [25]
+    out     USBOUT, x1      ;[26] <-- out J (idle) -- end of SE0 (EOP signal)
+    out     USBDDR, x2      ;[27] <-- release bus now
+    out     USBOUT, x3      ;[28] <-- ensure no pull-up resistors are active
+    rjmp    doReturn

mercurial