|
1 /* Name: usbdrvasm18.inc |
|
2 * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers |
|
3 * Author: Lukas Schrittwieser (based on 20 MHz usbdrvasm20.inc by Jeroen Benschop) |
|
4 * Creation Date: 2009-01-20 |
|
5 * Tabsize: 4 |
|
6 * Copyright: (c) 2008 by Lukas Schrittwieser and OBJECTIVE DEVELOPMENT Software GmbH |
|
7 * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt) |
|
8 */ |
|
9 |
|
10 /* Do not link this file! Link usbdrvasm.S instead, which includes the |
|
11 * appropriate implementation! |
|
12 */ |
|
13 |
|
14 /* |
|
15 General Description: |
|
16 This file is the 18 MHz version of the asssembler part of the USB driver. It |
|
17 requires a 18 MHz crystal (not a ceramic resonator and not a calibrated RC |
|
18 oscillator). |
|
19 |
|
20 See usbdrv.h for a description of the entire driver. |
|
21 |
|
22 Since almost all of this code is timing critical, don't change unless you |
|
23 really know what you are doing! Many parts require not only a maximum number |
|
24 of CPU cycles, but even an exact number of cycles! |
|
25 */ |
|
26 |
|
27 |
|
28 ;max stack usage: [ret(2), YL, SREG, YH, [sofError], bitcnt(x5), shift, x1, x2, x3, x4, cnt, ZL, ZH] = 14 bytes |
|
29 ;nominal frequency: 18 MHz -> 12 cycles per bit |
|
30 ; Numbers in brackets are clocks counted from center of last sync bit |
|
31 ; when instruction starts |
|
32 ;register use in receive loop to receive the data bytes: |
|
33 ; shift assembles the byte currently being received |
|
34 ; x1 holds the D+ and D- line state |
|
35 ; x2 holds the previous line state |
|
36 ; cnt holds the number of bytes left in the receive buffer |
|
37 ; x3 holds the higher crc byte (see algorithm below) |
|
38 ; x4 is used as temporary register for the crc algorithm |
|
39 ; x5 is used for unstuffing: when unstuffing the last received bit is inverted in shift (to prevent further |
|
40 ; unstuffing calls. In the same time the corresponding bit in x5 is cleared to mark the bit as beening iverted |
|
41 ; zl lower crc value and crc table index |
|
42 ; zh used for crc table accesses |
|
43 |
|
44 ;-------------------------------------------------------------------------------------------------------------- |
|
45 ; CRC mods: |
|
46 ; table driven crc checker, Z points to table in prog space |
|
47 ; ZL is the lower crc byte, x3 is the higher crc byte |
|
48 ; x4 is used as temp register to store different results |
|
49 ; the initialization of the crc register is not 0xFFFF but 0xFE54. This is because during the receipt of the |
|
50 ; first data byte an virtual zero data byte is added to the crc register, this results in the correct initial |
|
51 ; value of 0xFFFF at beginning of the second data byte before the first data byte is added to the crc. |
|
52 ; The magic number 0xFE54 results form the crc table: At tabH[0x54] = 0xFF = crcH (required) and |
|
53 ; tabL[0x54] = 0x01 -> crcL = 0x01 xor 0xFE = 0xFF |
|
54 ; bitcnt is renamed to x5 and is used for unstuffing purposes, the unstuffing works like in the 12MHz version |
|
55 ;-------------------------------------------------------------------------------------------------------------- |
|
56 ; CRC algorithm: |
|
57 ; The crc register is formed by x3 (higher byte) and ZL (lower byte). The algorithm uses a 'reversed' form |
|
58 ; i.e. that it takes the least significant bit first and shifts to the right. So in fact the highest order |
|
59 ; bit seen from the polynomial devision point of view is the lsb of ZL. (If this sounds strange to you i |
|
60 ; propose a research on CRC :-) ) |
|
61 ; Each data byte received is xored to ZL, the lower crc byte. This byte now builds the crc |
|
62 ; table index. Next the new high byte is loaded from the table and stored in x4 until we have space in x3 |
|
63 ; (its destination). |
|
64 ; Afterwards the lower table is loaded from the table and stored in ZL (the old index is overwritten as |
|
65 ; we don't need it anymore. In fact this is a right shift by 8 bits.) Now the old crc high value is xored |
|
66 ; to ZL, this is the second shift of the old crc value. Now x4 (the temp reg) is moved to x3 and the crc |
|
67 ; calculation is done. |
|
68 ; Prior to the first byte the two CRC register have to be initialized to 0xFFFF (as defined in usb spec) |
|
69 ; however the crc engine also runs during the receipt of the first byte, therefore x3 and zl are initialized |
|
70 ; to a magic number which results in a crc value of 0xFFFF after the first complete byte. |
|
71 ; |
|
72 ; This algorithm is split into the extra cycles of the different bits: |
|
73 ; bit7: XOR the received byte to ZL |
|
74 ; bit5: load the new high byte to x4 |
|
75 ; bit6: load the lower xor byte from the table, xor zl and x3, store result in zl (=the new crc low value) |
|
76 ; move x4 (the new high byte) to x3, the crc value is ready |
|
77 ; |
|
78 |
|
79 |
|
80 macro POP_STANDARD ; 18 cycles |
|
81 pop ZH |
|
82 pop ZL |
|
83 pop cnt |
|
84 pop x5 |
|
85 pop x3 |
|
86 pop x2 |
|
87 pop x1 |
|
88 pop shift |
|
89 pop x4 |
|
90 endm |
|
91 macro POP_RETI ; 7 cycles |
|
92 pop YH |
|
93 pop YL |
|
94 out SREG, YL |
|
95 pop YL |
|
96 endm |
|
97 |
|
98 macro CRC_CLEANUP_AND_CHECK |
|
99 ; the last byte has already been xored with the lower crc byte, we have to do the table lookup and xor |
|
100 ; x3 is the higher crc byte, zl the lower one |
|
101 ldi ZH, hi8(usbCrcTableHigh);[+1] get the new high byte from the table |
|
102 lpm x2, Z ;[+2][+3][+4] |
|
103 ldi ZH, hi8(usbCrcTableLow);[+5] get the new low xor byte from the table |
|
104 lpm ZL, Z ;[+6][+7][+8] |
|
105 eor ZL, x3 ;[+7] xor the old high byte with the value from the table, x2:ZL now holds the crc value |
|
106 cpi ZL, 0x01 ;[+8] if the crc is ok we have a fixed remainder value of 0xb001 in x2:ZL (see usb spec) |
|
107 brne ignorePacket ;[+9] detected a crc fault -> paket is ignored and retransmitted by the host |
|
108 cpi x2, 0xb0 ;[+10] |
|
109 brne ignorePacket ;[+11] detected a crc fault -> paket is ignored and retransmitted by the host |
|
110 endm |
|
111 |
|
112 |
|
113 USB_INTR_VECTOR: |
|
114 ;order of registers pushed: YL, SREG, YH, [sofError], x4, shift, x1, x2, x3, x5, cnt, ZL, ZH |
|
115 push YL ;[-28] push only what is necessary to sync with edge ASAP |
|
116 in YL, SREG ;[-26] |
|
117 push YL ;[-25] |
|
118 push YH ;[-23] |
|
119 ;---------------------------------------------------------------------------- |
|
120 ; Synchronize with sync pattern: |
|
121 ;---------------------------------------------------------------------------- |
|
122 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K] |
|
123 ;sync up with J to K edge during sync pattern -- use fastest possible loops |
|
124 ;The first part waits at most 1 bit long since we must be in sync pattern. |
|
125 ;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to |
|
126 ;waitForJ, ensure that this prerequisite is met. |
|
127 waitForJ: |
|
128 inc YL |
|
129 sbis USBIN, USBMINUS |
|
130 brne waitForJ ; just make sure we have ANY timeout |
|
131 waitForK: |
|
132 ;The following code results in a sampling window of < 1/4 bit which meets the spec. |
|
133 sbis USBIN, USBMINUS ;[-17] |
|
134 rjmp foundK ;[-16] |
|
135 sbis USBIN, USBMINUS |
|
136 rjmp foundK |
|
137 sbis USBIN, USBMINUS |
|
138 rjmp foundK |
|
139 sbis USBIN, USBMINUS |
|
140 rjmp foundK |
|
141 sbis USBIN, USBMINUS |
|
142 rjmp foundK |
|
143 sbis USBIN, USBMINUS |
|
144 rjmp foundK |
|
145 sbis USBIN, USBMINUS |
|
146 rjmp foundK |
|
147 sbis USBIN, USBMINUS |
|
148 rjmp foundK |
|
149 sbis USBIN, USBMINUS |
|
150 rjmp foundK |
|
151 #if USB_COUNT_SOF |
|
152 lds YL, usbSofCount |
|
153 inc YL |
|
154 sts usbSofCount, YL |
|
155 #endif /* USB_COUNT_SOF */ |
|
156 #ifdef USB_SOF_HOOK |
|
157 USB_SOF_HOOK |
|
158 #endif |
|
159 rjmp sofError |
|
160 foundK: ;[-15] |
|
161 ;{3, 5} after falling D- edge, average delay: 4 cycles |
|
162 ;bit0 should be at 30 (2.5 bits) for center sampling. Currently at 4 so 26 cylces till bit 0 sample |
|
163 ;use 1 bit time for setup purposes, then sample again. Numbers in brackets |
|
164 ;are cycles from center of first sync (double K) bit after the instruction |
|
165 push x4 ;[-14] |
|
166 ; [---] ;[-13] |
|
167 lds YL, usbInputBufOffset;[-12] used to toggle the two usb receive buffers |
|
168 ; [---] ;[-11] |
|
169 clr YH ;[-10] |
|
170 subi YL, lo8(-(usbRxBuf));[-9] [rx loop init] |
|
171 sbci YH, hi8(-(usbRxBuf));[-8] [rx loop init] |
|
172 push shift ;[-7] |
|
173 ; [---] ;[-6] |
|
174 ldi shift, 0x80 ;[-5] the last bit is the end of byte marker for the pid receiver loop |
|
175 clc ;[-4] the carry has to be clear for receipt of pid bit 0 |
|
176 sbis USBIN, USBMINUS ;[-3] we want two bits K (sample 3 cycles too early) |
|
177 rjmp haveTwoBitsK ;[-2] |
|
178 pop shift ;[-1] undo the push from before |
|
179 pop x4 ;[1] |
|
180 rjmp waitForK ;[3] this was not the end of sync, retry |
|
181 ; The entire loop from waitForK until rjmp waitForK above must not exceed two |
|
182 ; bit times (= 24 cycles). |
|
183 |
|
184 ;---------------------------------------------------------------------------- |
|
185 ; push more registers and initialize values while we sample the first bits: |
|
186 ;---------------------------------------------------------------------------- |
|
187 haveTwoBitsK: |
|
188 push x1 ;[0] |
|
189 push x2 ;[2] |
|
190 push x3 ;[4] crc high byte |
|
191 ldi x2, 1<<USBPLUS ;[6] [rx loop init] current line state is K state. D+=="1", D-=="0" |
|
192 push x5 ;[7] |
|
193 push cnt ;[9] |
|
194 ldi cnt, USB_BUFSIZE ;[11] |
|
195 |
|
196 |
|
197 ;-------------------------------------------------------------------------------------------------------------- |
|
198 ; receives the pid byte |
|
199 ; there is no real unstuffing algorithm implemented here as a stuffing bit is impossible in the pid byte. |
|
200 ; That's because the last four bits of the byte are the inverted of the first four bits. If we detect a |
|
201 ; unstuffing condition something went wrong and abort |
|
202 ; shift has to be initialized to 0x80 |
|
203 ;-------------------------------------------------------------------------------------------------------------- |
|
204 |
|
205 ; pid bit 0 - used for even more register saving (we need the z pointer) |
|
206 in x1, USBIN ;[0] sample line state |
|
207 andi x1, USBMASK ;[1] filter only D+ and D- bits |
|
208 eor x2, x1 ;[2] generate inverted of actual bit |
|
209 sbrc x2, USBMINUS ;[3] if the bit is set we received a zero |
|
210 sec ;[4] |
|
211 ror shift ;[5] we perform no unstuffing check here as this is the first bit |
|
212 mov x2, x1 ;[6] |
|
213 push ZL ;[7] |
|
214 ;[8] |
|
215 push ZH ;[9] |
|
216 ;[10] |
|
217 ldi x3, 0xFE ;[11] x3 is the high order crc value |
|
218 |
|
219 |
|
220 bitloopPid: |
|
221 in x1, USBIN ;[0] sample line state |
|
222 andi x1, USBMASK ;[1] filter only D+ and D- bits |
|
223 breq nse0 ;[2] both lines are low so handle se0 |
|
224 eor x2, x1 ;[3] generate inverted of actual bit |
|
225 sbrc x2, USBMINUS ;[4] set the carry if we received a zero |
|
226 sec ;[5] |
|
227 ror shift ;[6] |
|
228 ldi ZL, 0x54 ;[7] ZL is the low order crc value |
|
229 ser x4 ;[8] the is no bit stuffing check here as the pid bit can't be stuffed. if so |
|
230 ; some error occured. In this case the paket is discarded later on anyway. |
|
231 mov x2, x1 ;[9] prepare for the next cycle |
|
232 brcc bitloopPid ;[10] while 0s drop out of shift we get the next bit |
|
233 eor x4, shift ;[11] invert all bits in shift and store result in x4 |
|
234 |
|
235 ;-------------------------------------------------------------------------------------------------------------- |
|
236 ; receives data bytes and calculates the crc |
|
237 ; the last USBIN state has to be in x2 |
|
238 ; this is only the first half, due to branch distanc limitations the second half of the loop is near the end |
|
239 ; of this asm file |
|
240 ;-------------------------------------------------------------------------------------------------------------- |
|
241 |
|
242 rxDataStart: |
|
243 in x1, USBIN ;[0] sample line state (note: a se0 check is not useful due to bit dribbling) |
|
244 ser x5 ;[1] prepare the unstuff marker register |
|
245 eor x2, x1 ;[2] generates the inverted of the actual bit |
|
246 bst x2, USBMINUS ;[3] copy the bit from x2 |
|
247 bld shift, 0 ;[4] and store it in shift |
|
248 mov x2, shift ;[5] make a copy of shift for unstuffing check |
|
249 andi x2, 0xF9 ;[6] mask the last six bits, if we got six zeros (which are six ones in fact) |
|
250 breq unstuff0 ;[7] then Z is set now and we branch to the unstuffing handler |
|
251 didunstuff0: |
|
252 subi cnt, 1 ;[8] cannot use dec because it doesn't affect the carry flag |
|
253 brcs nOverflow ;[9] Too many bytes received. Ignore packet |
|
254 st Y+, x4 ;[10] store the last received byte |
|
255 ;[11] st needs two cycles |
|
256 |
|
257 ; bit1 |
|
258 in x2, USBIN ;[0] sample line state |
|
259 andi x1, USBMASK ;[1] check for se0 during bit 0 |
|
260 breq nse0 ;[2] |
|
261 andi x2, USBMASK ;[3] check se0 during bit 1 |
|
262 breq nse0 ;[4] |
|
263 eor x1, x2 ;[5] |
|
264 bst x1, USBMINUS ;[6] |
|
265 bld shift, 1 ;[7] |
|
266 mov x1, shift ;[8] |
|
267 andi x1, 0xF3 ;[9] |
|
268 breq unstuff1 ;[10] |
|
269 didunstuff1: |
|
270 nop ;[11] |
|
271 |
|
272 ; bit2 |
|
273 in x1, USBIN ;[0] sample line state |
|
274 andi x1, USBMASK ;[1] check for se0 (as there is nothing else to do here |
|
275 breq nOverflow ;[2] |
|
276 eor x2, x1 ;[3] generates the inverted of the actual bit |
|
277 bst x2, USBMINUS ;[4] |
|
278 bld shift, 2 ;[5] store the bit |
|
279 mov x2, shift ;[6] |
|
280 andi x2, 0xE7 ;[7] if we have six zeros here (which means six 1 in the stream) |
|
281 breq unstuff2 ;[8] the next bit is a stuffing bit |
|
282 didunstuff2: |
|
283 nop2 ;[9] |
|
284 ;[10] |
|
285 nop ;[11] |
|
286 |
|
287 ; bit3 |
|
288 in x2, USBIN ;[0] sample line state |
|
289 andi x2, USBMASK ;[1] check for se0 |
|
290 breq nOverflow ;[2] |
|
291 eor x1, x2 ;[3] |
|
292 bst x1, USBMINUS ;[4] |
|
293 bld shift, 3 ;[5] |
|
294 mov x1, shift ;[6] |
|
295 andi x1, 0xCF ;[7] |
|
296 breq unstuff3 ;[8] |
|
297 didunstuff3: |
|
298 nop ;[9] |
|
299 rjmp rxDataBit4 ;[10] |
|
300 ;[11] |
|
301 |
|
302 ; the avr branch instructions allow an offset of +63 insturction only, so we need this |
|
303 ; 'local copy' of se0 |
|
304 nse0: |
|
305 rjmp se0 ;[4] |
|
306 ;[5] |
|
307 ; the same same as for se0 is needed for overflow and StuffErr |
|
308 nOverflow: |
|
309 stuffErr: |
|
310 rjmp overflow |
|
311 |
|
312 |
|
313 unstuff0: ;[8] this is the branch delay of breq unstuffX |
|
314 andi x1, USBMASK ;[9] do an se0 check here (if the last crc byte ends with 5 one's we might end up here |
|
315 breq didunstuff0 ;[10] event tough the message is complete -> jump back and store the byte |
|
316 ori shift, 0x01 ;[11] invert the last received bit to prevent furhter unstuffing |
|
317 in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors |
|
318 andi x5, 0xFE ;[1] mark this bit as inverted (will be corrected before storing shift) |
|
319 eor x1, x2 ;[2] x1 and x2 have to be different because the stuff bit is always a zero |
|
320 andi x1, USBMASK ;[3] mask the interesting bits |
|
321 breq stuffErr ;[4] if the stuff bit is a 1-bit something went wrong |
|
322 mov x1, x2 ;[5] the next bit expects the last state to be in x1 |
|
323 rjmp didunstuff0 ;[6] |
|
324 ;[7] jump delay of rjmp didunstuffX |
|
325 |
|
326 unstuff1: ;[11] this is the jump delay of breq unstuffX |
|
327 in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors |
|
328 ori shift, 0x02 ;[1] invert the last received bit to prevent furhter unstuffing |
|
329 andi x5, 0xFD ;[2] mark this bit as inverted (will be corrected before storing shift) |
|
330 eor x2, x1 ;[3] x1 and x2 have to be different because the stuff bit is always a zero |
|
331 andi x2, USBMASK ;[4] mask the interesting bits |
|
332 breq stuffErr ;[5] if the stuff bit is a 1-bit something went wrong |
|
333 mov x2, x1 ;[6] the next bit expects the last state to be in x2 |
|
334 nop2 ;[7] |
|
335 ;[8] |
|
336 rjmp didunstuff1 ;[9] |
|
337 ;[10] jump delay of rjmp didunstuffX |
|
338 |
|
339 unstuff2: ;[9] this is the jump delay of breq unstuffX |
|
340 ori shift, 0x04 ;[10] invert the last received bit to prevent furhter unstuffing |
|
341 andi x5, 0xFB ;[11] mark this bit as inverted (will be corrected before storing shift) |
|
342 in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors |
|
343 eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero |
|
344 andi x1, USBMASK ;[2] mask the interesting bits |
|
345 breq stuffErr ;[3] if the stuff bit is a 1-bit something went wrong |
|
346 mov x1, x2 ;[4] the next bit expects the last state to be in x1 |
|
347 nop2 ;[5] |
|
348 ;[6] |
|
349 rjmp didunstuff2 ;[7] |
|
350 ;[8] jump delay of rjmp didunstuffX |
|
351 |
|
352 unstuff3: ;[9] this is the jump delay of breq unstuffX |
|
353 ori shift, 0x08 ;[10] invert the last received bit to prevent furhter unstuffing |
|
354 andi x5, 0xF7 ;[11] mark this bit as inverted (will be corrected before storing shift) |
|
355 in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors |
|
356 eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero |
|
357 andi x2, USBMASK ;[2] mask the interesting bits |
|
358 breq stuffErr ;[3] if the stuff bit is a 1-bit something went wrong |
|
359 mov x2, x1 ;[4] the next bit expects the last state to be in x2 |
|
360 nop2 ;[5] |
|
361 ;[6] |
|
362 rjmp didunstuff3 ;[7] |
|
363 ;[8] jump delay of rjmp didunstuffX |
|
364 |
|
365 |
|
366 |
|
367 ; the include has to be here due to branch distance restirctions |
|
368 #define __USE_CRC__ |
|
369 #include "asmcommon.inc" |
|
370 |
|
371 |
|
372 |
|
373 ; USB spec says: |
|
374 ; idle = J |
|
375 ; J = (D+ = 0), (D- = 1) |
|
376 ; K = (D+ = 1), (D- = 0) |
|
377 ; Spec allows 7.5 bit times from EOP to SOP for replies |
|
378 ; 7.5 bit times is 90 cycles. ...there is plenty of time |
|
379 |
|
380 |
|
381 sendNakAndReti: |
|
382 ldi x3, USBPID_NAK ;[-18] |
|
383 rjmp sendX3AndReti ;[-17] |
|
384 sendAckAndReti: |
|
385 ldi cnt, USBPID_ACK ;[-17] |
|
386 sendCntAndReti: |
|
387 mov x3, cnt ;[-16] |
|
388 sendX3AndReti: |
|
389 ldi YL, 20 ;[-15] x3==r20 address is 20 |
|
390 ldi YH, 0 ;[-14] |
|
391 ldi cnt, 2 ;[-13] |
|
392 ; rjmp usbSendAndReti fallthrough |
|
393 |
|
394 ;usbSend: |
|
395 ;pointer to data in 'Y' |
|
396 ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12] |
|
397 ;uses: x1...x4, btcnt, shift, cnt, Y |
|
398 ;Numbers in brackets are time since first bit of sync pattern is sent |
|
399 |
|
400 usbSendAndReti: ; 12 cycles until SOP |
|
401 in x2, USBDDR ;[-12] |
|
402 ori x2, USBMASK ;[-11] |
|
403 sbi USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups) |
|
404 in x1, USBOUT ;[-8] port mirror for tx loop |
|
405 out USBDDR, x2 ;[-6] <- acquire bus |
|
406 ldi x2, 0 ;[-6] init x2 (bitstuff history) because sync starts with 0 |
|
407 ldi x4, USBMASK ;[-5] exor mask |
|
408 ldi shift, 0x80 ;[-4] sync byte is first byte sent |
|
409 txByteLoop: |
|
410 ldi bitcnt, 0x40 ;[-3]=[9] binary 01000000 |
|
411 txBitLoop: ; the loop sends the first 7 bits of the byte |
|
412 sbrs shift, 0 ;[-2]=[10] if we have to send a 1 don't change the line state |
|
413 eor x1, x4 ;[-1]=[11] |
|
414 out USBOUT, x1 ;[0] |
|
415 ror shift ;[1] |
|
416 ror x2 ;[2] transfers the last sent bit to the stuffing history |
|
417 didStuffN: |
|
418 nop ;[3] |
|
419 nop ;[4] |
|
420 cpi x2, 0xfc ;[5] if we sent six consecutive ones |
|
421 brcc bitstuffN ;[6] |
|
422 lsr bitcnt ;[7] |
|
423 brne txBitLoop ;[8] restart the loop while the 1 is still in the bitcount |
|
424 |
|
425 ; transmit bit 7 |
|
426 sbrs shift, 0 ;[9] |
|
427 eor x1, x4 ;[10] |
|
428 didStuff7: |
|
429 ror shift ;[11] |
|
430 out USBOUT, x1 ;[0] transfer bit 7 to the pins |
|
431 ror x2 ;[1] move the bit into the stuffing history |
|
432 cpi x2, 0xfc ;[2] |
|
433 brcc bitstuff7 ;[3] |
|
434 ld shift, y+ ;[4] get next byte to transmit |
|
435 dec cnt ;[5] decrement byte counter |
|
436 brne txByteLoop ;[7] if we have more bytes start next one |
|
437 ;[8] branch delay |
|
438 |
|
439 ;make SE0: |
|
440 cbr x1, USBMASK ;[8] prepare SE0 [spec says EOP may be 25 to 30 cycles] |
|
441 lds x2, usbNewDeviceAddr;[9] |
|
442 lsl x2 ;[11] we compare with left shifted address |
|
443 out USBOUT, x1 ;[0] <-- out SE0 -- from now 2 bits = 24 cycles until bus idle |
|
444 subi YL, 20 + 2 ;[1] Only assign address on data packets, not ACK/NAK in x3 |
|
445 sbci YH, 0 ;[2] |
|
446 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm: |
|
447 ;set address only after data packet was sent, not after handshake |
|
448 breq skipAddrAssign ;[3] |
|
449 sts usbDeviceAddr, x2 ; if not skipped: SE0 is one cycle longer |
|
450 skipAddrAssign: |
|
451 ;end of usbDeviceAddress transfer |
|
452 ldi x2, 1<<USB_INTR_PENDING_BIT;[5] int0 occurred during TX -- clear pending flag |
|
453 USB_STORE_PENDING(x2) ;[6] |
|
454 ori x1, USBIDLE ;[7] |
|
455 in x2, USBDDR ;[8] |
|
456 cbr x2, USBMASK ;[9] set both pins to input |
|
457 mov x3, x1 ;[10] |
|
458 cbr x3, USBMASK ;[11] configure no pullup on both pins |
|
459 ldi x4, 4 ;[12] |
|
460 se0Delay: |
|
461 dec x4 ;[13] [16] [19] [22] |
|
462 brne se0Delay ;[14] [17] [20] [23] |
|
463 out USBOUT, x1 ;[24] <-- out J (idle) -- end of SE0 (EOP signal) |
|
464 out USBDDR, x2 ;[25] <-- release bus now |
|
465 out USBOUT, x3 ;[26] <-- ensure no pull-up resistors are active |
|
466 rjmp doReturn |
|
467 |
|
468 bitstuffN: |
|
469 eor x1, x4 ;[8] generate a zero |
|
470 ldi x2, 0 ;[9] reset the bit stuffing history |
|
471 nop2 ;[10] |
|
472 out USBOUT, x1 ;[0] <-- send the stuffing bit |
|
473 rjmp didStuffN ;[1] |
|
474 |
|
475 bitstuff7: |
|
476 eor x1, x4 ;[5] |
|
477 ldi x2, 0 ;[6] reset bit stuffing history |
|
478 clc ;[7] fill a zero into the shift register |
|
479 rol shift ;[8] compensate for ror shift at branch destination |
|
480 rjmp didStuff7 ;[9] |
|
481 ;[10] jump delay |
|
482 |
|
483 ;-------------------------------------------------------------------------------------------------------------- |
|
484 ; receives data bytes and calculates the crc |
|
485 ; second half of the data byte receiver loop |
|
486 ; most parts of the crc algorithm are here |
|
487 ;-------------------------------------------------------------------------------------------------------------- |
|
488 |
|
489 nOverflow2: |
|
490 rjmp overflow |
|
491 |
|
492 rxDataBit4: |
|
493 in x1, USBIN ;[0] sample line state |
|
494 andi x1, USBMASK ;[1] check for se0 |
|
495 breq nOverflow2 ;[2] |
|
496 eor x2, x1 ;[3] |
|
497 bst x2, USBMINUS ;[4] |
|
498 bld shift, 4 ;[5] |
|
499 mov x2, shift ;[6] |
|
500 andi x2, 0x9F ;[7] |
|
501 breq unstuff4 ;[8] |
|
502 didunstuff4: |
|
503 nop2 ;[9][10] |
|
504 nop ;[11] |
|
505 |
|
506 ; bit5 |
|
507 in x2, USBIN ;[0] sample line state |
|
508 ldi ZH, hi8(usbCrcTableHigh);[1] use the table for the higher byte |
|
509 eor x1, x2 ;[2] |
|
510 bst x1, USBMINUS ;[3] |
|
511 bld shift, 5 ;[4] |
|
512 mov x1, shift ;[5] |
|
513 andi x1, 0x3F ;[6] |
|
514 breq unstuff5 ;[7] |
|
515 didunstuff5: |
|
516 lpm x4, Z ;[8] load the higher crc xor-byte and store it for later use |
|
517 ;[9] lpm needs 3 cycles |
|
518 ;[10] |
|
519 ldi ZH, hi8(usbCrcTableLow);[11] load the lower crc xor byte adress |
|
520 |
|
521 ; bit6 |
|
522 in x1, USBIN ;[0] sample line state |
|
523 eor x2, x1 ;[1] |
|
524 bst x2, USBMINUS ;[2] |
|
525 bld shift, 6 ;[3] |
|
526 mov x2, shift ;[4] |
|
527 andi x2, 0x7E ;[5] |
|
528 breq unstuff6 ;[6] |
|
529 didunstuff6: |
|
530 lpm ZL, Z ;[7] load the lower xor crc byte |
|
531 ;[8] lpm needs 3 cycles |
|
532 ;[9] |
|
533 eor ZL, x3 ;[10] xor the old high crc byte with the low xor-byte |
|
534 mov x3, x4 ;[11] move the new high order crc value from temp to its destination |
|
535 |
|
536 ; bit7 |
|
537 in x2, USBIN ;[0] sample line state |
|
538 eor x1, x2 ;[1] |
|
539 bst x1, USBMINUS ;[2] |
|
540 bld shift, 7 ;[3] now shift holds the complete but inverted data byte |
|
541 mov x1, shift ;[4] |
|
542 andi x1, 0xFC ;[5] |
|
543 breq unstuff7 ;[6] |
|
544 didunstuff7: |
|
545 eor x5, shift ;[7] x5 marks all bits which have not been inverted by the unstuffing subs |
|
546 mov x4, x5 ;[8] keep a copy of the data byte it will be stored during next bit0 |
|
547 eor ZL, x4 ;[9] feed the actual byte into the crc algorithm |
|
548 rjmp rxDataStart ;[10] next byte |
|
549 ;[11] during the reception of the next byte this one will be fed int the crc algorithm |
|
550 |
|
551 unstuff4: ;[9] this is the jump delay of rjmp unstuffX |
|
552 ori shift, 0x10 ;[10] invert the last received bit to prevent furhter unstuffing |
|
553 andi x5, 0xEF ;[11] mark this bit as inverted (will be corrected before storing shift) |
|
554 in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors |
|
555 eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero |
|
556 andi x1, USBMASK ;[2] mask the interesting bits |
|
557 breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong |
|
558 mov x1, x2 ;[4] the next bit expects the last state to be in x1 |
|
559 nop2 ;[5] |
|
560 ;[6] |
|
561 rjmp didunstuff4 ;[7] |
|
562 ;[8] jump delay of rjmp didunstuffX |
|
563 |
|
564 unstuff5: ;[8] this is the jump delay of rjmp unstuffX |
|
565 nop ;[9] |
|
566 ori shift, 0x20 ;[10] invert the last received bit to prevent furhter unstuffing |
|
567 andi x5, 0xDF ;[11] mark this bit as inverted (will be corrected before storing shift) |
|
568 in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors |
|
569 eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero |
|
570 andi x2, USBMASK ;[2] mask the interesting bits |
|
571 breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong |
|
572 mov x2, x1 ;[4] the next bit expects the last state to be in x2 |
|
573 nop ;[5] |
|
574 rjmp didunstuff5 ;[6] |
|
575 ;[7] jump delay of rjmp didunstuffX |
|
576 |
|
577 unstuff6: ;[7] this is the jump delay of rjmp unstuffX |
|
578 nop2 ;[8] |
|
579 ;[9] |
|
580 ori shift, 0x40 ;[10] invert the last received bit to prevent furhter unstuffing |
|
581 andi x5, 0xBF ;[11] mark this bit as inverted (will be corrected before storing shift) |
|
582 in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors |
|
583 eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero |
|
584 andi x1, USBMASK ;[2] mask the interesting bits |
|
585 breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong |
|
586 mov x1, x2 ;[4] the next bit expects the last state to be in x1 |
|
587 rjmp didunstuff6 ;[5] |
|
588 ;[6] jump delay of rjmp didunstuffX |
|
589 |
|
590 unstuff7: ;[7] this is the jump delay of rjmp unstuffX |
|
591 nop ;[8] |
|
592 nop ;[9] |
|
593 ori shift, 0x80 ;[10] invert the last received bit to prevent furhter unstuffing |
|
594 andi x5, 0x7F ;[11] mark this bit as inverted (will be corrected before storing shift) |
|
595 in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors |
|
596 eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero |
|
597 andi x2, USBMASK ;[2] mask the interesting bits |
|
598 breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong |
|
599 mov x2, x1 ;[4] the next bit expects the last state to be in x2 |
|
600 rjmp didunstuff7 ;[5] |
|
601 ;[6] jump delay of rjmp didunstuff7 |
|
602 |
|
603 ; local copy of the stuffErr desitnation for the second half of the receiver loop |
|
604 stuffErr2: |
|
605 rjmp stuffErr |
|
606 |
|
607 ;-------------------------------------------------------------------------------------------------------------- |
|
608 ; The crc table follows. It has to be aligned to enable a fast loading of the needed bytes. |
|
609 ; There are two tables of 256 entries each, the low and the high byte table. |
|
610 ; Table values were generated with the following C code: |
|
611 /* |
|
612 #include <stdio.h> |
|
613 int main (int argc, char **argv) |
|
614 { |
|
615 int i, j; |
|
616 for (i=0; i<512; i++){ |
|
617 unsigned short crc = i & 0xff; |
|
618 for(j=0; j<8; j++) crc = (crc >> 1) ^ ((crc & 1) ? 0xa001 : 0); |
|
619 if((i & 7) == 0) printf("\n.byte "); |
|
620 printf("0x%02x, ", (i > 0xff ? (crc >> 8) : crc) & 0xff); |
|
621 if(i == 255) printf("\n"); |
|
622 } |
|
623 return 0; |
|
624 } |
|
625 |
|
626 // Use the following algorithm to compute CRC values: |
|
627 ushort computeCrc(uchar *msg, uchar msgLen) |
|
628 { |
|
629 uchar i; |
|
630 ushort crc = 0xffff; |
|
631 for(i = 0; i < msgLen; i++) |
|
632 crc = usbCrcTable16[lo8(crc) ^ msg[i]] ^ hi8(crc); |
|
633 return crc; |
|
634 } |
|
635 */ |
|
636 |
|
637 .balign 256 |
|
638 usbCrcTableLow: |
|
639 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
640 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
641 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
642 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
643 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
644 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
645 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
646 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
647 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
648 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
649 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
650 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
651 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
652 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
653 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
654 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
655 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
656 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
657 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
658 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
659 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
660 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
661 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
662 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
663 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
664 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
665 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
666 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
667 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
668 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
669 .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 |
|
670 .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 |
|
671 |
|
672 ; .balign 256 |
|
673 usbCrcTableHigh: |
|
674 .byte 0x00, 0xC0, 0xC1, 0x01, 0xC3, 0x03, 0x02, 0xC2 |
|
675 .byte 0xC6, 0x06, 0x07, 0xC7, 0x05, 0xC5, 0xC4, 0x04 |
|
676 .byte 0xCC, 0x0C, 0x0D, 0xCD, 0x0F, 0xCF, 0xCE, 0x0E |
|
677 .byte 0x0A, 0xCA, 0xCB, 0x0B, 0xC9, 0x09, 0x08, 0xC8 |
|
678 .byte 0xD8, 0x18, 0x19, 0xD9, 0x1B, 0xDB, 0xDA, 0x1A |
|
679 .byte 0x1E, 0xDE, 0xDF, 0x1F, 0xDD, 0x1D, 0x1C, 0xDC |
|
680 .byte 0x14, 0xD4, 0xD5, 0x15, 0xD7, 0x17, 0x16, 0xD6 |
|
681 .byte 0xD2, 0x12, 0x13, 0xD3, 0x11, 0xD1, 0xD0, 0x10 |
|
682 .byte 0xF0, 0x30, 0x31, 0xF1, 0x33, 0xF3, 0xF2, 0x32 |
|
683 .byte 0x36, 0xF6, 0xF7, 0x37, 0xF5, 0x35, 0x34, 0xF4 |
|
684 .byte 0x3C, 0xFC, 0xFD, 0x3D, 0xFF, 0x3F, 0x3E, 0xFE |
|
685 .byte 0xFA, 0x3A, 0x3B, 0xFB, 0x39, 0xF9, 0xF8, 0x38 |
|
686 .byte 0x28, 0xE8, 0xE9, 0x29, 0xEB, 0x2B, 0x2A, 0xEA |
|
687 .byte 0xEE, 0x2E, 0x2F, 0xEF, 0x2D, 0xED, 0xEC, 0x2C |
|
688 .byte 0xE4, 0x24, 0x25, 0xE5, 0x27, 0xE7, 0xE6, 0x26 |
|
689 .byte 0x22, 0xE2, 0xE3, 0x23, 0xE1, 0x21, 0x20, 0xE0 |
|
690 .byte 0xA0, 0x60, 0x61, 0xA1, 0x63, 0xA3, 0xA2, 0x62 |
|
691 .byte 0x66, 0xA6, 0xA7, 0x67, 0xA5, 0x65, 0x64, 0xA4 |
|
692 .byte 0x6C, 0xAC, 0xAD, 0x6D, 0xAF, 0x6F, 0x6E, 0xAE |
|
693 .byte 0xAA, 0x6A, 0x6B, 0xAB, 0x69, 0xA9, 0xA8, 0x68 |
|
694 .byte 0x78, 0xB8, 0xB9, 0x79, 0xBB, 0x7B, 0x7A, 0xBA |
|
695 .byte 0xBE, 0x7E, 0x7F, 0xBF, 0x7D, 0xBD, 0xBC, 0x7C |
|
696 .byte 0xB4, 0x74, 0x75, 0xB5, 0x77, 0xB7, 0xB6, 0x76 |
|
697 .byte 0x72, 0xB2, 0xB3, 0x73, 0xB1, 0x71, 0x70, 0xB0 |
|
698 .byte 0x50, 0x90, 0x91, 0x51, 0x93, 0x53, 0x52, 0x92 |
|
699 .byte 0x96, 0x56, 0x57, 0x97, 0x55, 0x95, 0x94, 0x54 |
|
700 .byte 0x9C, 0x5C, 0x5D, 0x9D, 0x5F, 0x9F, 0x9E, 0x5E |
|
701 .byte 0x5A, 0x9A, 0x9B, 0x5B, 0x99, 0x59, 0x58, 0x98 |
|
702 .byte 0x88, 0x48, 0x49, 0x89, 0x4B, 0x8B, 0x8A, 0x4A |
|
703 .byte 0x4E, 0x8E, 0x8F, 0x4F, 0x8D, 0x4D, 0x4C, 0x8C |
|
704 .byte 0x44, 0x84, 0x85, 0x45, 0x87, 0x47, 0x46, 0x86 |
|
705 .byte 0x82, 0x42, 0x43, 0x83, 0x41, 0x81, 0x80, 0x40 |
|
706 |