Thu, 16 Feb 2017 14:50:25 +0100
Added schematics and PCB layout
0 | 1 | /* Name: usbdrvasm18.inc |
2 | * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers | |
3 | * Author: Lukas Schrittwieser (based on 20 MHz usbdrvasm20.inc by Jeroen Benschop) | |
4 | * Creation Date: 2009-01-20 | |
5 | * Tabsize: 4 | |
6 | * Copyright: (c) 2008 by Lukas Schrittwieser and OBJECTIVE DEVELOPMENT Software GmbH | |
7 | * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt) | |
8 | */ | |
9 | ||
10 | /* Do not link this file! Link usbdrvasm.S instead, which includes the | |
11 | * appropriate implementation! | |
12 | */ | |
13 | ||
14 | /* | |
15 | General Description: | |
16 | This file is the 18 MHz version of the asssembler part of the USB driver. It | |
17 | requires a 18 MHz crystal (not a ceramic resonator and not a calibrated RC | |
18 | oscillator). | |
19 | ||
20 | See usbdrv.h for a description of the entire driver. | |
21 | ||
22 | Since almost all of this code is timing critical, don't change unless you | |
23 | really know what you are doing! Many parts require not only a maximum number | |
24 | of CPU cycles, but even an exact number of cycles! | |
25 | */ | |
26 | ||
27 | ||
28 | ;max stack usage: [ret(2), YL, SREG, YH, [sofError], bitcnt(x5), shift, x1, x2, x3, x4, cnt, ZL, ZH] = 14 bytes | |
29 | ;nominal frequency: 18 MHz -> 12 cycles per bit | |
30 | ; Numbers in brackets are clocks counted from center of last sync bit | |
31 | ; when instruction starts | |
32 | ;register use in receive loop to receive the data bytes: | |
33 | ; shift assembles the byte currently being received | |
34 | ; x1 holds the D+ and D- line state | |
35 | ; x2 holds the previous line state | |
36 | ; cnt holds the number of bytes left in the receive buffer | |
37 | ; x3 holds the higher crc byte (see algorithm below) | |
38 | ; x4 is used as temporary register for the crc algorithm | |
39 | ; x5 is used for unstuffing: when unstuffing the last received bit is inverted in shift (to prevent further | |
40 | ; unstuffing calls. In the same time the corresponding bit in x5 is cleared to mark the bit as beening iverted | |
41 | ; zl lower crc value and crc table index | |
42 | ; zh used for crc table accesses | |
43 | ||
44 | ;-------------------------------------------------------------------------------------------------------------- | |
45 | ; CRC mods: | |
46 | ; table driven crc checker, Z points to table in prog space | |
47 | ; ZL is the lower crc byte, x3 is the higher crc byte | |
48 | ; x4 is used as temp register to store different results | |
49 | ; the initialization of the crc register is not 0xFFFF but 0xFE54. This is because during the receipt of the | |
50 | ; first data byte an virtual zero data byte is added to the crc register, this results in the correct initial | |
51 | ; value of 0xFFFF at beginning of the second data byte before the first data byte is added to the crc. | |
52 | ; The magic number 0xFE54 results form the crc table: At tabH[0x54] = 0xFF = crcH (required) and | |
53 | ; tabL[0x54] = 0x01 -> crcL = 0x01 xor 0xFE = 0xFF | |
54 | ; bitcnt is renamed to x5 and is used for unstuffing purposes, the unstuffing works like in the 12MHz version | |
55 | ;-------------------------------------------------------------------------------------------------------------- | |
56 | ; CRC algorithm: | |
57 | ; The crc register is formed by x3 (higher byte) and ZL (lower byte). The algorithm uses a 'reversed' form | |
58 | ; i.e. that it takes the least significant bit first and shifts to the right. So in fact the highest order | |
59 | ; bit seen from the polynomial devision point of view is the lsb of ZL. (If this sounds strange to you i | |
60 | ; propose a research on CRC :-) ) | |
61 | ; Each data byte received is xored to ZL, the lower crc byte. This byte now builds the crc | |
62 | ; table index. Next the new high byte is loaded from the table and stored in x4 until we have space in x3 | |
63 | ; (its destination). | |
64 | ; Afterwards the lower table is loaded from the table and stored in ZL (the old index is overwritten as | |
65 | ; we don't need it anymore. In fact this is a right shift by 8 bits.) Now the old crc high value is xored | |
66 | ; to ZL, this is the second shift of the old crc value. Now x4 (the temp reg) is moved to x3 and the crc | |
67 | ; calculation is done. | |
68 | ; Prior to the first byte the two CRC register have to be initialized to 0xFFFF (as defined in usb spec) | |
69 | ; however the crc engine also runs during the receipt of the first byte, therefore x3 and zl are initialized | |
70 | ; to a magic number which results in a crc value of 0xFFFF after the first complete byte. | |
71 | ; | |
72 | ; This algorithm is split into the extra cycles of the different bits: | |
73 | ; bit7: XOR the received byte to ZL | |
74 | ; bit5: load the new high byte to x4 | |
75 | ; bit6: load the lower xor byte from the table, xor zl and x3, store result in zl (=the new crc low value) | |
76 | ; move x4 (the new high byte) to x3, the crc value is ready | |
77 | ; | |
78 | ||
79 | ||
80 | macro POP_STANDARD ; 18 cycles | |
81 | pop ZH | |
82 | pop ZL | |
83 | pop cnt | |
84 | pop x5 | |
85 | pop x3 | |
86 | pop x2 | |
87 | pop x1 | |
88 | pop shift | |
89 | pop x4 | |
90 | endm | |
91 | macro POP_RETI ; 7 cycles | |
92 | pop YH | |
93 | pop YL | |
94 | out SREG, YL | |
95 | pop YL | |
96 | endm | |
97 | ||
98 | macro CRC_CLEANUP_AND_CHECK | |
99 | ; the last byte has already been xored with the lower crc byte, we have to do the table lookup and xor | |
100 | ; x3 is the higher crc byte, zl the lower one | |
101 | ldi ZH, hi8(usbCrcTableHigh);[+1] get the new high byte from the table | |
102 | lpm x2, Z ;[+2][+3][+4] | |
103 | ldi ZH, hi8(usbCrcTableLow);[+5] get the new low xor byte from the table | |
104 | lpm ZL, Z ;[+6][+7][+8] | |
105 | eor ZL, x3 ;[+7] xor the old high byte with the value from the table, x2:ZL now holds the crc value | |
106 | cpi ZL, 0x01 ;[+8] if the crc is ok we have a fixed remainder value of 0xb001 in x2:ZL (see usb spec) | |
107 | brne ignorePacket ;[+9] detected a crc fault -> paket is ignored and retransmitted by the host | |
108 | cpi x2, 0xb0 ;[+10] | |
109 | brne ignorePacket ;[+11] detected a crc fault -> paket is ignored and retransmitted by the host | |
110 | endm | |
111 | ||
112 | ||
113 | USB_INTR_VECTOR: | |
114 | ;order of registers pushed: YL, SREG, YH, [sofError], x4, shift, x1, x2, x3, x5, cnt, ZL, ZH | |
115 | push YL ;[-28] push only what is necessary to sync with edge ASAP | |
116 | in YL, SREG ;[-26] | |
117 | push YL ;[-25] | |
118 | push YH ;[-23] | |
119 | ;---------------------------------------------------------------------------- | |
120 | ; Synchronize with sync pattern: | |
121 | ;---------------------------------------------------------------------------- | |
122 | ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K] | |
123 | ;sync up with J to K edge during sync pattern -- use fastest possible loops | |
124 | ;The first part waits at most 1 bit long since we must be in sync pattern. | |
125 | ;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to | |
126 | ;waitForJ, ensure that this prerequisite is met. | |
127 | waitForJ: | |
128 | inc YL | |
129 | sbis USBIN, USBMINUS | |
130 | brne waitForJ ; just make sure we have ANY timeout | |
131 | waitForK: | |
132 | ;The following code results in a sampling window of < 1/4 bit which meets the spec. | |
133 | sbis USBIN, USBMINUS ;[-17] | |
134 | rjmp foundK ;[-16] | |
135 | sbis USBIN, USBMINUS | |
136 | rjmp foundK | |
137 | sbis USBIN, USBMINUS | |
138 | rjmp foundK | |
139 | sbis USBIN, USBMINUS | |
140 | rjmp foundK | |
141 | sbis USBIN, USBMINUS | |
142 | rjmp foundK | |
143 | sbis USBIN, USBMINUS | |
144 | rjmp foundK | |
145 | sbis USBIN, USBMINUS | |
146 | rjmp foundK | |
147 | sbis USBIN, USBMINUS | |
148 | rjmp foundK | |
149 | sbis USBIN, USBMINUS | |
150 | rjmp foundK | |
151 | #if USB_COUNT_SOF | |
152 | lds YL, usbSofCount | |
153 | inc YL | |
154 | sts usbSofCount, YL | |
155 | #endif /* USB_COUNT_SOF */ | |
156 | #ifdef USB_SOF_HOOK | |
157 | USB_SOF_HOOK | |
158 | #endif | |
159 | rjmp sofError | |
160 | foundK: ;[-15] | |
161 | ;{3, 5} after falling D- edge, average delay: 4 cycles | |
162 | ;bit0 should be at 30 (2.5 bits) for center sampling. Currently at 4 so 26 cylces till bit 0 sample | |
163 | ;use 1 bit time for setup purposes, then sample again. Numbers in brackets | |
164 | ;are cycles from center of first sync (double K) bit after the instruction | |
165 | push x4 ;[-14] | |
166 | ; [---] ;[-13] | |
167 | lds YL, usbInputBufOffset;[-12] used to toggle the two usb receive buffers | |
168 | ; [---] ;[-11] | |
169 | clr YH ;[-10] | |
170 | subi YL, lo8(-(usbRxBuf));[-9] [rx loop init] | |
171 | sbci YH, hi8(-(usbRxBuf));[-8] [rx loop init] | |
172 | push shift ;[-7] | |
173 | ; [---] ;[-6] | |
174 | ldi shift, 0x80 ;[-5] the last bit is the end of byte marker for the pid receiver loop | |
175 | clc ;[-4] the carry has to be clear for receipt of pid bit 0 | |
176 | sbis USBIN, USBMINUS ;[-3] we want two bits K (sample 3 cycles too early) | |
177 | rjmp haveTwoBitsK ;[-2] | |
178 | pop shift ;[-1] undo the push from before | |
179 | pop x4 ;[1] | |
180 | rjmp waitForK ;[3] this was not the end of sync, retry | |
181 | ; The entire loop from waitForK until rjmp waitForK above must not exceed two | |
182 | ; bit times (= 24 cycles). | |
183 | ||
184 | ;---------------------------------------------------------------------------- | |
185 | ; push more registers and initialize values while we sample the first bits: | |
186 | ;---------------------------------------------------------------------------- | |
187 | haveTwoBitsK: | |
188 | push x1 ;[0] | |
189 | push x2 ;[2] | |
190 | push x3 ;[4] crc high byte | |
191 | ldi x2, 1<<USBPLUS ;[6] [rx loop init] current line state is K state. D+=="1", D-=="0" | |
192 | push x5 ;[7] | |
193 | push cnt ;[9] | |
194 | ldi cnt, USB_BUFSIZE ;[11] | |
195 | ||
196 | ||
197 | ;-------------------------------------------------------------------------------------------------------------- | |
198 | ; receives the pid byte | |
199 | ; there is no real unstuffing algorithm implemented here as a stuffing bit is impossible in the pid byte. | |
200 | ; That's because the last four bits of the byte are the inverted of the first four bits. If we detect a | |
201 | ; unstuffing condition something went wrong and abort | |
202 | ; shift has to be initialized to 0x80 | |
203 | ;-------------------------------------------------------------------------------------------------------------- | |
204 | ||
205 | ; pid bit 0 - used for even more register saving (we need the z pointer) | |
206 | in x1, USBIN ;[0] sample line state | |
207 | andi x1, USBMASK ;[1] filter only D+ and D- bits | |
208 | eor x2, x1 ;[2] generate inverted of actual bit | |
209 | sbrc x2, USBMINUS ;[3] if the bit is set we received a zero | |
210 | sec ;[4] | |
211 | ror shift ;[5] we perform no unstuffing check here as this is the first bit | |
212 | mov x2, x1 ;[6] | |
213 | push ZL ;[7] | |
214 | ;[8] | |
215 | push ZH ;[9] | |
216 | ;[10] | |
217 | ldi x3, 0xFE ;[11] x3 is the high order crc value | |
218 | ||
219 | ||
220 | bitloopPid: | |
221 | in x1, USBIN ;[0] sample line state | |
222 | andi x1, USBMASK ;[1] filter only D+ and D- bits | |
223 | breq nse0 ;[2] both lines are low so handle se0 | |
224 | eor x2, x1 ;[3] generate inverted of actual bit | |
225 | sbrc x2, USBMINUS ;[4] set the carry if we received a zero | |
226 | sec ;[5] | |
227 | ror shift ;[6] | |
228 | ldi ZL, 0x54 ;[7] ZL is the low order crc value | |
229 | ser x4 ;[8] the is no bit stuffing check here as the pid bit can't be stuffed. if so | |
230 | ; some error occured. In this case the paket is discarded later on anyway. | |
231 | mov x2, x1 ;[9] prepare for the next cycle | |
232 | brcc bitloopPid ;[10] while 0s drop out of shift we get the next bit | |
233 | eor x4, shift ;[11] invert all bits in shift and store result in x4 | |
234 | ||
235 | ;-------------------------------------------------------------------------------------------------------------- | |
236 | ; receives data bytes and calculates the crc | |
237 | ; the last USBIN state has to be in x2 | |
238 | ; this is only the first half, due to branch distanc limitations the second half of the loop is near the end | |
239 | ; of this asm file | |
240 | ;-------------------------------------------------------------------------------------------------------------- | |
241 | ||
242 | rxDataStart: | |
243 | in x1, USBIN ;[0] sample line state (note: a se0 check is not useful due to bit dribbling) | |
244 | ser x5 ;[1] prepare the unstuff marker register | |
245 | eor x2, x1 ;[2] generates the inverted of the actual bit | |
246 | bst x2, USBMINUS ;[3] copy the bit from x2 | |
247 | bld shift, 0 ;[4] and store it in shift | |
248 | mov x2, shift ;[5] make a copy of shift for unstuffing check | |
249 | andi x2, 0xF9 ;[6] mask the last six bits, if we got six zeros (which are six ones in fact) | |
250 | breq unstuff0 ;[7] then Z is set now and we branch to the unstuffing handler | |
251 | didunstuff0: | |
252 | subi cnt, 1 ;[8] cannot use dec because it doesn't affect the carry flag | |
253 | brcs nOverflow ;[9] Too many bytes received. Ignore packet | |
254 | st Y+, x4 ;[10] store the last received byte | |
255 | ;[11] st needs two cycles | |
256 | ||
257 | ; bit1 | |
258 | in x2, USBIN ;[0] sample line state | |
259 | andi x1, USBMASK ;[1] check for se0 during bit 0 | |
260 | breq nse0 ;[2] | |
261 | andi x2, USBMASK ;[3] check se0 during bit 1 | |
262 | breq nse0 ;[4] | |
263 | eor x1, x2 ;[5] | |
264 | bst x1, USBMINUS ;[6] | |
265 | bld shift, 1 ;[7] | |
266 | mov x1, shift ;[8] | |
267 | andi x1, 0xF3 ;[9] | |
268 | breq unstuff1 ;[10] | |
269 | didunstuff1: | |
270 | nop ;[11] | |
271 | ||
272 | ; bit2 | |
273 | in x1, USBIN ;[0] sample line state | |
274 | andi x1, USBMASK ;[1] check for se0 (as there is nothing else to do here | |
275 | breq nOverflow ;[2] | |
276 | eor x2, x1 ;[3] generates the inverted of the actual bit | |
277 | bst x2, USBMINUS ;[4] | |
278 | bld shift, 2 ;[5] store the bit | |
279 | mov x2, shift ;[6] | |
280 | andi x2, 0xE7 ;[7] if we have six zeros here (which means six 1 in the stream) | |
281 | breq unstuff2 ;[8] the next bit is a stuffing bit | |
282 | didunstuff2: | |
283 | nop2 ;[9] | |
284 | ;[10] | |
285 | nop ;[11] | |
286 | ||
287 | ; bit3 | |
288 | in x2, USBIN ;[0] sample line state | |
289 | andi x2, USBMASK ;[1] check for se0 | |
290 | breq nOverflow ;[2] | |
291 | eor x1, x2 ;[3] | |
292 | bst x1, USBMINUS ;[4] | |
293 | bld shift, 3 ;[5] | |
294 | mov x1, shift ;[6] | |
295 | andi x1, 0xCF ;[7] | |
296 | breq unstuff3 ;[8] | |
297 | didunstuff3: | |
298 | nop ;[9] | |
299 | rjmp rxDataBit4 ;[10] | |
300 | ;[11] | |
301 | ||
302 | ; the avr branch instructions allow an offset of +63 insturction only, so we need this | |
303 | ; 'local copy' of se0 | |
304 | nse0: | |
305 | rjmp se0 ;[4] | |
306 | ;[5] | |
307 | ; the same same as for se0 is needed for overflow and StuffErr | |
308 | nOverflow: | |
309 | stuffErr: | |
310 | rjmp overflow | |
311 | ||
312 | ||
313 | unstuff0: ;[8] this is the branch delay of breq unstuffX | |
314 | andi x1, USBMASK ;[9] do an se0 check here (if the last crc byte ends with 5 one's we might end up here | |
315 | breq didunstuff0 ;[10] event tough the message is complete -> jump back and store the byte | |
316 | ori shift, 0x01 ;[11] invert the last received bit to prevent furhter unstuffing | |
317 | in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors | |
318 | andi x5, 0xFE ;[1] mark this bit as inverted (will be corrected before storing shift) | |
319 | eor x1, x2 ;[2] x1 and x2 have to be different because the stuff bit is always a zero | |
320 | andi x1, USBMASK ;[3] mask the interesting bits | |
321 | breq stuffErr ;[4] if the stuff bit is a 1-bit something went wrong | |
322 | mov x1, x2 ;[5] the next bit expects the last state to be in x1 | |
323 | rjmp didunstuff0 ;[6] | |
324 | ;[7] jump delay of rjmp didunstuffX | |
325 | ||
326 | unstuff1: ;[11] this is the jump delay of breq unstuffX | |
327 | in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors | |
328 | ori shift, 0x02 ;[1] invert the last received bit to prevent furhter unstuffing | |
329 | andi x5, 0xFD ;[2] mark this bit as inverted (will be corrected before storing shift) | |
330 | eor x2, x1 ;[3] x1 and x2 have to be different because the stuff bit is always a zero | |
331 | andi x2, USBMASK ;[4] mask the interesting bits | |
332 | breq stuffErr ;[5] if the stuff bit is a 1-bit something went wrong | |
333 | mov x2, x1 ;[6] the next bit expects the last state to be in x2 | |
334 | nop2 ;[7] | |
335 | ;[8] | |
336 | rjmp didunstuff1 ;[9] | |
337 | ;[10] jump delay of rjmp didunstuffX | |
338 | ||
339 | unstuff2: ;[9] this is the jump delay of breq unstuffX | |
340 | ori shift, 0x04 ;[10] invert the last received bit to prevent furhter unstuffing | |
341 | andi x5, 0xFB ;[11] mark this bit as inverted (will be corrected before storing shift) | |
342 | in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors | |
343 | eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero | |
344 | andi x1, USBMASK ;[2] mask the interesting bits | |
345 | breq stuffErr ;[3] if the stuff bit is a 1-bit something went wrong | |
346 | mov x1, x2 ;[4] the next bit expects the last state to be in x1 | |
347 | nop2 ;[5] | |
348 | ;[6] | |
349 | rjmp didunstuff2 ;[7] | |
350 | ;[8] jump delay of rjmp didunstuffX | |
351 | ||
352 | unstuff3: ;[9] this is the jump delay of breq unstuffX | |
353 | ori shift, 0x08 ;[10] invert the last received bit to prevent furhter unstuffing | |
354 | andi x5, 0xF7 ;[11] mark this bit as inverted (will be corrected before storing shift) | |
355 | in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors | |
356 | eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero | |
357 | andi x2, USBMASK ;[2] mask the interesting bits | |
358 | breq stuffErr ;[3] if the stuff bit is a 1-bit something went wrong | |
359 | mov x2, x1 ;[4] the next bit expects the last state to be in x2 | |
360 | nop2 ;[5] | |
361 | ;[6] | |
362 | rjmp didunstuff3 ;[7] | |
363 | ;[8] jump delay of rjmp didunstuffX | |
364 | ||
365 | ||
366 | ||
367 | ; the include has to be here due to branch distance restirctions | |
368 | #define __USE_CRC__ | |
369 | #include "asmcommon.inc" | |
370 | ||
371 | ||
372 | ||
373 | ; USB spec says: | |
374 | ; idle = J | |
375 | ; J = (D+ = 0), (D- = 1) | |
376 | ; K = (D+ = 1), (D- = 0) | |
377 | ; Spec allows 7.5 bit times from EOP to SOP for replies | |
378 | ; 7.5 bit times is 90 cycles. ...there is plenty of time | |
379 | ||
380 | ||
381 | sendNakAndReti: | |
382 | ldi x3, USBPID_NAK ;[-18] | |
383 | rjmp sendX3AndReti ;[-17] | |
384 | sendAckAndReti: | |
385 | ldi cnt, USBPID_ACK ;[-17] | |
386 | sendCntAndReti: | |
387 | mov x3, cnt ;[-16] | |
388 | sendX3AndReti: | |
389 | ldi YL, 20 ;[-15] x3==r20 address is 20 | |
390 | ldi YH, 0 ;[-14] | |
391 | ldi cnt, 2 ;[-13] | |
392 | ; rjmp usbSendAndReti fallthrough | |
393 | ||
394 | ;usbSend: | |
395 | ;pointer to data in 'Y' | |
396 | ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12] | |
397 | ;uses: x1...x4, btcnt, shift, cnt, Y | |
398 | ;Numbers in brackets are time since first bit of sync pattern is sent | |
399 | ||
400 | usbSendAndReti: ; 12 cycles until SOP | |
401 | in x2, USBDDR ;[-12] | |
402 | ori x2, USBMASK ;[-11] | |
403 | sbi USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups) | |
404 | in x1, USBOUT ;[-8] port mirror for tx loop | |
405 | out USBDDR, x2 ;[-6] <- acquire bus | |
406 | ldi x2, 0 ;[-6] init x2 (bitstuff history) because sync starts with 0 | |
407 | ldi x4, USBMASK ;[-5] exor mask | |
408 | ldi shift, 0x80 ;[-4] sync byte is first byte sent | |
409 | txByteLoop: | |
410 | ldi bitcnt, 0x40 ;[-3]=[9] binary 01000000 | |
411 | txBitLoop: ; the loop sends the first 7 bits of the byte | |
412 | sbrs shift, 0 ;[-2]=[10] if we have to send a 1 don't change the line state | |
413 | eor x1, x4 ;[-1]=[11] | |
414 | out USBOUT, x1 ;[0] | |
415 | ror shift ;[1] | |
416 | ror x2 ;[2] transfers the last sent bit to the stuffing history | |
417 | didStuffN: | |
418 | nop ;[3] | |
419 | nop ;[4] | |
420 | cpi x2, 0xfc ;[5] if we sent six consecutive ones | |
421 | brcc bitstuffN ;[6] | |
422 | lsr bitcnt ;[7] | |
423 | brne txBitLoop ;[8] restart the loop while the 1 is still in the bitcount | |
424 | ||
425 | ; transmit bit 7 | |
426 | sbrs shift, 0 ;[9] | |
427 | eor x1, x4 ;[10] | |
428 | didStuff7: | |
429 | ror shift ;[11] | |
430 | out USBOUT, x1 ;[0] transfer bit 7 to the pins | |
431 | ror x2 ;[1] move the bit into the stuffing history | |
432 | cpi x2, 0xfc ;[2] | |
433 | brcc bitstuff7 ;[3] | |
434 | ld shift, y+ ;[4] get next byte to transmit | |
435 | dec cnt ;[5] decrement byte counter | |
436 | brne txByteLoop ;[7] if we have more bytes start next one | |
437 | ;[8] branch delay | |
438 | ||
439 | ;make SE0: | |
440 | cbr x1, USBMASK ;[8] prepare SE0 [spec says EOP may be 25 to 30 cycles] | |
441 | lds x2, usbNewDeviceAddr;[9] | |
442 | lsl x2 ;[11] we compare with left shifted address | |
443 | out USBOUT, x1 ;[0] <-- out SE0 -- from now 2 bits = 24 cycles until bus idle | |
444 | subi YL, 20 + 2 ;[1] Only assign address on data packets, not ACK/NAK in x3 | |
445 | sbci YH, 0 ;[2] | |
446 | ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm: | |
447 | ;set address only after data packet was sent, not after handshake | |
448 | breq skipAddrAssign ;[3] | |
449 | sts usbDeviceAddr, x2 ; if not skipped: SE0 is one cycle longer | |
450 | skipAddrAssign: | |
451 | ;end of usbDeviceAddress transfer | |
452 | ldi x2, 1<<USB_INTR_PENDING_BIT;[5] int0 occurred during TX -- clear pending flag | |
453 | USB_STORE_PENDING(x2) ;[6] | |
454 | ori x1, USBIDLE ;[7] | |
455 | in x2, USBDDR ;[8] | |
456 | cbr x2, USBMASK ;[9] set both pins to input | |
457 | mov x3, x1 ;[10] | |
458 | cbr x3, USBMASK ;[11] configure no pullup on both pins | |
459 | ldi x4, 4 ;[12] | |
460 | se0Delay: | |
461 | dec x4 ;[13] [16] [19] [22] | |
462 | brne se0Delay ;[14] [17] [20] [23] | |
463 | out USBOUT, x1 ;[24] <-- out J (idle) -- end of SE0 (EOP signal) | |
464 | out USBDDR, x2 ;[25] <-- release bus now | |
465 | out USBOUT, x3 ;[26] <-- ensure no pull-up resistors are active | |
466 | rjmp doReturn | |
467 | ||
468 | bitstuffN: | |
469 | eor x1, x4 ;[8] generate a zero | |
470 | ldi x2, 0 ;[9] reset the bit stuffing history | |
471 | nop2 ;[10] | |
472 | out USBOUT, x1 ;[0] <-- send the stuffing bit | |
473 | rjmp didStuffN ;[1] | |
474 | ||
475 | bitstuff7: | |
476 | eor x1, x4 ;[5] | |
477 | ldi x2, 0 ;[6] reset bit stuffing history | |
478 | clc ;[7] fill a zero into the shift register | |
479 | rol shift ;[8] compensate for ror shift at branch destination | |
480 | rjmp didStuff7 ;[9] | |
481 | ;[10] jump delay | |
482 | ||
483 | ;-------------------------------------------------------------------------------------------------------------- | |
484 | ; receives data bytes and calculates the crc | |
485 | ; second half of the data byte receiver loop | |
486 | ; most parts of the crc algorithm are here | |
487 | ;-------------------------------------------------------------------------------------------------------------- | |
488 | ||
489 | nOverflow2: | |
490 | rjmp overflow | |
491 | ||
492 | rxDataBit4: | |
493 | in x1, USBIN ;[0] sample line state | |
494 | andi x1, USBMASK ;[1] check for se0 | |
495 | breq nOverflow2 ;[2] | |
496 | eor x2, x1 ;[3] | |
497 | bst x2, USBMINUS ;[4] | |
498 | bld shift, 4 ;[5] | |
499 | mov x2, shift ;[6] | |
500 | andi x2, 0x9F ;[7] | |
501 | breq unstuff4 ;[8] | |
502 | didunstuff4: | |
503 | nop2 ;[9][10] | |
504 | nop ;[11] | |
505 | ||
506 | ; bit5 | |
507 | in x2, USBIN ;[0] sample line state | |
508 | ldi ZH, hi8(usbCrcTableHigh);[1] use the table for the higher byte | |
509 | eor x1, x2 ;[2] | |
510 | bst x1, USBMINUS ;[3] | |
511 | bld shift, 5 ;[4] | |
512 | mov x1, shift ;[5] | |
513 | andi x1, 0x3F ;[6] | |
514 | breq unstuff5 ;[7] | |
515 | didunstuff5: | |
516 | lpm x4, Z ;[8] load the higher crc xor-byte and store it for later use | |
517 | ;[9] lpm needs 3 cycles | |
518 | ;[10] | |
519 | ldi ZH, hi8(usbCrcTableLow);[11] load the lower crc xor byte adress | |
520 | ||
521 | ; bit6 | |
522 | in x1, USBIN ;[0] sample line state | |
523 | eor x2, x1 ;[1] | |
524 | bst x2, USBMINUS ;[2] | |
525 | bld shift, 6 ;[3] | |
526 | mov x2, shift ;[4] | |
527 | andi x2, 0x7E ;[5] | |
528 | breq unstuff6 ;[6] | |
529 | didunstuff6: | |
530 | lpm ZL, Z ;[7] load the lower xor crc byte | |
531 | ;[8] lpm needs 3 cycles | |
532 | ;[9] | |
533 | eor ZL, x3 ;[10] xor the old high crc byte with the low xor-byte | |
534 | mov x3, x4 ;[11] move the new high order crc value from temp to its destination | |
535 | ||
536 | ; bit7 | |
537 | in x2, USBIN ;[0] sample line state | |
538 | eor x1, x2 ;[1] | |
539 | bst x1, USBMINUS ;[2] | |
540 | bld shift, 7 ;[3] now shift holds the complete but inverted data byte | |
541 | mov x1, shift ;[4] | |
542 | andi x1, 0xFC ;[5] | |
543 | breq unstuff7 ;[6] | |
544 | didunstuff7: | |
545 | eor x5, shift ;[7] x5 marks all bits which have not been inverted by the unstuffing subs | |
546 | mov x4, x5 ;[8] keep a copy of the data byte it will be stored during next bit0 | |
547 | eor ZL, x4 ;[9] feed the actual byte into the crc algorithm | |
548 | rjmp rxDataStart ;[10] next byte | |
549 | ;[11] during the reception of the next byte this one will be fed int the crc algorithm | |
550 | ||
551 | unstuff4: ;[9] this is the jump delay of rjmp unstuffX | |
552 | ori shift, 0x10 ;[10] invert the last received bit to prevent furhter unstuffing | |
553 | andi x5, 0xEF ;[11] mark this bit as inverted (will be corrected before storing shift) | |
554 | in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors | |
555 | eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero | |
556 | andi x1, USBMASK ;[2] mask the interesting bits | |
557 | breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong | |
558 | mov x1, x2 ;[4] the next bit expects the last state to be in x1 | |
559 | nop2 ;[5] | |
560 | ;[6] | |
561 | rjmp didunstuff4 ;[7] | |
562 | ;[8] jump delay of rjmp didunstuffX | |
563 | ||
564 | unstuff5: ;[8] this is the jump delay of rjmp unstuffX | |
565 | nop ;[9] | |
566 | ori shift, 0x20 ;[10] invert the last received bit to prevent furhter unstuffing | |
567 | andi x5, 0xDF ;[11] mark this bit as inverted (will be corrected before storing shift) | |
568 | in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors | |
569 | eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero | |
570 | andi x2, USBMASK ;[2] mask the interesting bits | |
571 | breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong | |
572 | mov x2, x1 ;[4] the next bit expects the last state to be in x2 | |
573 | nop ;[5] | |
574 | rjmp didunstuff5 ;[6] | |
575 | ;[7] jump delay of rjmp didunstuffX | |
576 | ||
577 | unstuff6: ;[7] this is the jump delay of rjmp unstuffX | |
578 | nop2 ;[8] | |
579 | ;[9] | |
580 | ori shift, 0x40 ;[10] invert the last received bit to prevent furhter unstuffing | |
581 | andi x5, 0xBF ;[11] mark this bit as inverted (will be corrected before storing shift) | |
582 | in x2, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors | |
583 | eor x1, x2 ;[1] x1 and x2 have to be different because the stuff bit is always a zero | |
584 | andi x1, USBMASK ;[2] mask the interesting bits | |
585 | breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong | |
586 | mov x1, x2 ;[4] the next bit expects the last state to be in x1 | |
587 | rjmp didunstuff6 ;[5] | |
588 | ;[6] jump delay of rjmp didunstuffX | |
589 | ||
590 | unstuff7: ;[7] this is the jump delay of rjmp unstuffX | |
591 | nop ;[8] | |
592 | nop ;[9] | |
593 | ori shift, 0x80 ;[10] invert the last received bit to prevent furhter unstuffing | |
594 | andi x5, 0x7F ;[11] mark this bit as inverted (will be corrected before storing shift) | |
595 | in x1, USBIN ;[0] we have some free cycles so we could check for bit stuffing errors | |
596 | eor x2, x1 ;[1] x1 and x2 have to be different because the stuff bit is always a zero | |
597 | andi x2, USBMASK ;[2] mask the interesting bits | |
598 | breq stuffErr2 ;[3] if the stuff bit is a 1-bit something went wrong | |
599 | mov x2, x1 ;[4] the next bit expects the last state to be in x2 | |
600 | rjmp didunstuff7 ;[5] | |
601 | ;[6] jump delay of rjmp didunstuff7 | |
602 | ||
603 | ; local copy of the stuffErr desitnation for the second half of the receiver loop | |
604 | stuffErr2: | |
605 | rjmp stuffErr | |
606 | ||
607 | ;-------------------------------------------------------------------------------------------------------------- | |
608 | ; The crc table follows. It has to be aligned to enable a fast loading of the needed bytes. | |
609 | ; There are two tables of 256 entries each, the low and the high byte table. | |
610 | ; Table values were generated with the following C code: | |
611 | /* | |
612 | #include <stdio.h> | |
613 | int main (int argc, char **argv) | |
614 | { | |
615 | int i, j; | |
616 | for (i=0; i<512; i++){ | |
617 | unsigned short crc = i & 0xff; | |
618 | for(j=0; j<8; j++) crc = (crc >> 1) ^ ((crc & 1) ? 0xa001 : 0); | |
619 | if((i & 7) == 0) printf("\n.byte "); | |
620 | printf("0x%02x, ", (i > 0xff ? (crc >> 8) : crc) & 0xff); | |
621 | if(i == 255) printf("\n"); | |
622 | } | |
623 | return 0; | |
624 | } | |
625 | ||
626 | // Use the following algorithm to compute CRC values: | |
627 | ushort computeCrc(uchar *msg, uchar msgLen) | |
628 | { | |
629 | uchar i; | |
630 | ushort crc = 0xffff; | |
631 | for(i = 0; i < msgLen; i++) | |
632 | crc = usbCrcTable16[lo8(crc) ^ msg[i]] ^ hi8(crc); | |
633 | return crc; | |
634 | } | |
635 | */ | |
636 | ||
637 | .balign 256 | |
638 | usbCrcTableLow: | |
639 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
640 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
641 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
642 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
643 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
644 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
645 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
646 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
647 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
648 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
649 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
650 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
651 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
652 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
653 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
654 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
655 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
656 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
657 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
658 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
659 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
660 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
661 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
662 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
663 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
664 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
665 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
666 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
667 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
668 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
669 | .byte 0x00, 0xC1, 0x81, 0x40, 0x01, 0xC0, 0x80, 0x41 | |
670 | .byte 0x01, 0xC0, 0x80, 0x41, 0x00, 0xC1, 0x81, 0x40 | |
671 | ||
672 | ; .balign 256 | |
673 | usbCrcTableHigh: | |
674 | .byte 0x00, 0xC0, 0xC1, 0x01, 0xC3, 0x03, 0x02, 0xC2 | |
675 | .byte 0xC6, 0x06, 0x07, 0xC7, 0x05, 0xC5, 0xC4, 0x04 | |
676 | .byte 0xCC, 0x0C, 0x0D, 0xCD, 0x0F, 0xCF, 0xCE, 0x0E | |
677 | .byte 0x0A, 0xCA, 0xCB, 0x0B, 0xC9, 0x09, 0x08, 0xC8 | |
678 | .byte 0xD8, 0x18, 0x19, 0xD9, 0x1B, 0xDB, 0xDA, 0x1A | |
679 | .byte 0x1E, 0xDE, 0xDF, 0x1F, 0xDD, 0x1D, 0x1C, 0xDC | |
680 | .byte 0x14, 0xD4, 0xD5, 0x15, 0xD7, 0x17, 0x16, 0xD6 | |
681 | .byte 0xD2, 0x12, 0x13, 0xD3, 0x11, 0xD1, 0xD0, 0x10 | |
682 | .byte 0xF0, 0x30, 0x31, 0xF1, 0x33, 0xF3, 0xF2, 0x32 | |
683 | .byte 0x36, 0xF6, 0xF7, 0x37, 0xF5, 0x35, 0x34, 0xF4 | |
684 | .byte 0x3C, 0xFC, 0xFD, 0x3D, 0xFF, 0x3F, 0x3E, 0xFE | |
685 | .byte 0xFA, 0x3A, 0x3B, 0xFB, 0x39, 0xF9, 0xF8, 0x38 | |
686 | .byte 0x28, 0xE8, 0xE9, 0x29, 0xEB, 0x2B, 0x2A, 0xEA | |
687 | .byte 0xEE, 0x2E, 0x2F, 0xEF, 0x2D, 0xED, 0xEC, 0x2C | |
688 | .byte 0xE4, 0x24, 0x25, 0xE5, 0x27, 0xE7, 0xE6, 0x26 | |
689 | .byte 0x22, 0xE2, 0xE3, 0x23, 0xE1, 0x21, 0x20, 0xE0 | |
690 | .byte 0xA0, 0x60, 0x61, 0xA1, 0x63, 0xA3, 0xA2, 0x62 | |
691 | .byte 0x66, 0xA6, 0xA7, 0x67, 0xA5, 0x65, 0x64, 0xA4 | |
692 | .byte 0x6C, 0xAC, 0xAD, 0x6D, 0xAF, 0x6F, 0x6E, 0xAE | |
693 | .byte 0xAA, 0x6A, 0x6B, 0xAB, 0x69, 0xA9, 0xA8, 0x68 | |
694 | .byte 0x78, 0xB8, 0xB9, 0x79, 0xBB, 0x7B, 0x7A, 0xBA | |
695 | .byte 0xBE, 0x7E, 0x7F, 0xBF, 0x7D, 0xBD, 0xBC, 0x7C | |
696 | .byte 0xB4, 0x74, 0x75, 0xB5, 0x77, 0xB7, 0xB6, 0x76 | |
697 | .byte 0x72, 0xB2, 0xB3, 0x73, 0xB1, 0x71, 0x70, 0xB0 | |
698 | .byte 0x50, 0x90, 0x91, 0x51, 0x93, 0x53, 0x52, 0x92 | |
699 | .byte 0x96, 0x56, 0x57, 0x97, 0x55, 0x95, 0x94, 0x54 | |
700 | .byte 0x9C, 0x5C, 0x5D, 0x9D, 0x5F, 0x9F, 0x9E, 0x5E | |
701 | .byte 0x5A, 0x9A, 0x9B, 0x5B, 0x99, 0x59, 0x58, 0x98 | |
702 | .byte 0x88, 0x48, 0x49, 0x89, 0x4B, 0x8B, 0x8A, 0x4A | |
703 | .byte 0x4E, 0x8E, 0x8F, 0x4F, 0x8D, 0x4D, 0x4C, 0x8C | |
704 | .byte 0x44, 0x84, 0x85, 0x45, 0x87, 0x47, 0x46, 0x86 | |
705 | .byte 0x82, 0x42, 0x43, 0x83, 0x41, 0x81, 0x80, 0x40 | |
706 |