Reputation: 5874
I have a application where I am receiving a binary serial data stream, and I need to split this data-stream up into separate variables of various length (uint16_t
and uint32_t
).
Right now, I'm doing the ultra-simple:
#define OFFSET_iTOW 0
volatile uint8_t temp[128];
volatile uint32_t recBytes;
void main()
{
while (1)
{
recBytes = temp[OFFSET_iTOW+3];
recBytes <<= 8;
recBytes |= temp[OFFSET_iTOW+2];
recBytes <<= 8;
recBytes |= temp[OFFSET_iTOW+1];
recBytes <<= 8;
recBytes |= temp[OFFSET_iTOW+0];
}
}
(Data is sent little-endian. The OFFSET_iTOW
is one of about three dozen offsets (defined in a header file normally) for the various sections of a data packet)
However, this results in a rather enormous assembly output (slightly truncated to section of interest):
void main()
{
recBytes = 0;
12e: 10 92 04 02 sts 0x0204, r1
132: 10 92 05 02 sts 0x0205, r1
136: 10 92 06 02 sts 0x0206, r1
13a: 10 92 07 02 sts 0x0207, r1
while (1)
{
recBytes = temp[OFFSET_iTOW+3];
13e: 80 91 03 02 lds r24, 0x0203
142: 90 e0 ldi r25, 0x00 ; 0
144: a0 e0 ldi r26, 0x00 ; 0
146: b0 e0 ldi r27, 0x00 ; 0
148: 80 93 04 02 sts 0x0204, r24
14c: 90 93 05 02 sts 0x0205, r25
150: a0 93 06 02 sts 0x0206, r26
154: b0 93 07 02 sts 0x0207, r27
recBytes <<= 8;
158: 80 91 04 02 lds r24, 0x0204
15c: 90 91 05 02 lds r25, 0x0205
160: a0 91 06 02 lds r26, 0x0206
164: b0 91 07 02 lds r27, 0x0207
168: ba 2f mov r27, r26
16a: a9 2f mov r26, r25
16c: 98 2f mov r25, r24
16e: 88 27 eor r24, r24
170: 80 93 04 02 sts 0x0204, r24
174: 90 93 05 02 sts 0x0205, r25
178: a0 93 06 02 sts 0x0206, r26
17c: b0 93 07 02 sts 0x0207, r27
recBytes |= temp[OFFSET_iTOW+2];
180: 20 91 04 02 lds r18, 0x0204
184: 30 91 05 02 lds r19, 0x0205
188: 40 91 06 02 lds r20, 0x0206
18c: 50 91 07 02 lds r21, 0x0207
190: 80 91 02 02 lds r24, 0x0202
194: 90 e0 ldi r25, 0x00 ; 0
196: a0 e0 ldi r26, 0x00 ; 0
198: b0 e0 ldi r27, 0x00 ; 0
19a: 82 2b or r24, r18
19c: 93 2b or r25, r19
19e: a4 2b or r26, r20
1a0: b5 2b or r27, r21
1a2: 80 93 04 02 sts 0x0204, r24
1a6: 90 93 05 02 sts 0x0205, r25
1aa: a0 93 06 02 sts 0x0206, r26
1ae: b0 93 07 02 sts 0x0207, r27
recBytes <<= 8;
1b2: 80 91 04 02 lds r24, 0x0204
1b6: 90 91 05 02 lds r25, 0x0205
1ba: a0 91 06 02 lds r26, 0x0206
1be: b0 91 07 02 lds r27, 0x0207
1c2: ba 2f mov r27, r26
1c4: a9 2f mov r26, r25
1c6: 98 2f mov r25, r24
1c8: 88 27 eor r24, r24
1ca: 80 93 04 02 sts 0x0204, r24
1ce: 90 93 05 02 sts 0x0205, r25
1d2: a0 93 06 02 sts 0x0206, r26
1d6: b0 93 07 02 sts 0x0207, r27
recBytes |= temp[OFFSET_iTOW+1];
1da: 20 91 04 02 lds r18, 0x0204
1de: 30 91 05 02 lds r19, 0x0205
1e2: 40 91 06 02 lds r20, 0x0206
1e6: 50 91 07 02 lds r21, 0x0207
1ea: 80 91 01 02 lds r24, 0x0201
1ee: 90 e0 ldi r25, 0x00 ; 0
1f0: a0 e0 ldi r26, 0x00 ; 0
1f2: b0 e0 ldi r27, 0x00 ; 0
1f4: 82 2b or r24, r18
1f6: 93 2b or r25, r19
1f8: a4 2b or r26, r20
1fa: b5 2b or r27, r21
1fc: 80 93 04 02 sts 0x0204, r24
200: 90 93 05 02 sts 0x0205, r25
204: a0 93 06 02 sts 0x0206, r26
208: b0 93 07 02 sts 0x0207, r27
recBytes <<= 8;
20c: 80 91 04 02 lds r24, 0x0204
210: 90 91 05 02 lds r25, 0x0205
214: a0 91 06 02 lds r26, 0x0206
218: b0 91 07 02 lds r27, 0x0207
21c: ba 2f mov r27, r26
21e: a9 2f mov r26, r25
220: 98 2f mov r25, r24
222: 88 27 eor r24, r24
224: 80 93 04 02 sts 0x0204, r24
228: 90 93 05 02 sts 0x0205, r25
22c: a0 93 06 02 sts 0x0206, r26
230: b0 93 07 02 sts 0x0207, r27
recBytes |= temp[OFFSET_iTOW+0];
234: 20 91 04 02 lds r18, 0x0204
238: 30 91 05 02 lds r19, 0x0205
23c: 40 91 06 02 lds r20, 0x0206
240: 50 91 07 02 lds r21, 0x0207
244: 80 91 00 02 lds r24, 0x0200
248: 90 e0 ldi r25, 0x00 ; 0
24a: a0 e0 ldi r26, 0x00 ; 0
24c: b0 e0 ldi r27, 0x00 ; 0
24e: 82 2b or r24, r18
250: 93 2b or r25, r19
252: a4 2b or r26, r20
254: b5 2b or r27, r21
256: 80 93 04 02 sts 0x0204, r24
25a: 90 93 05 02 sts 0x0205, r25
25e: a0 93 06 02 sts 0x0206, r26
262: b0 93 07 02 sts 0x0207, r27
266: 6b cf rjmp .-298 ; 0x13e <loop+0x10>
This is part of a GPS data parser in the interrupt-service routine living on an 8 bit microprocessor running at 16 Mhz, and I need to do a lot of these conversions, so the result above is a bit excessive.
Since this is in an ISR, I can be confident that the various data will not change during the interrupt. Basically, I'd like to be able to address the individual bytes in the long. Since this is an 8-bit architecture, it seems like the compiler should be able to optimize down to just a few operations (maybe 3-4 per line of C, as the bytes in the long are directly addressable from an assembly perspective).
The variables are declared volatile
so they're not optimized away to a loop that does nothing. In the actual application, they're extern
ed structs that are written to from the ISR, but read from the idle loop (with the appropriate ISR guarding to prevent reads being interrupted). I'm not sure how to produce a compact snippet that demonstrates that exactt behavior, though.
Upvotes: 2
Views: 1791
Reputation: 154315
If your temp
buffer can be filled in the same endian order as your processor, you can form a union of the 128 byte temp
and 128/4 value
. No movement needed.
#define N (128)
union Data {
uint8_t temp[N];
uint32_t value_u32[N/sizeof(uint32_t)];
} recBytes;
recBytes.value[OFFSET_iTOW/4];
[edit expanded to meet additional OP's concerns]
typedef struct {
uint32_t field1;
int32_t field2;
int16_t field3; // This and next 2 pack nicely into 4 bytes
uint8_t field4;
uint8_t field5;
int32_t field6;
int32_t field7;
int32_t field8;
uint32_t field9;
int32_t field10;
int32_t field11;
int32_t field12;
uint32_t field13;
uint16_t field14; // This and next 2 pack nicely into 4 bytes
uint8_t field15;
uint8_t field16;
uint32_t field17;
} packet_t;
union Data {
uint8_t temp[128];
packet_t Packet;
} recBytes;
The union
could consist of all the fields of the packet structure. After checksum verification, simply copy the structure, rather than field by field.
Working_var = recBytes.Packet; // or memcpy(&Working_var, &recBytes.Packet, sizeof(Working_var);
Note: your supplied packet defines 52 bytes.
Upvotes: 1
Reputation: 42564
Your compiler is generating a 32-bit store for every single input byte and shift instruction - it has to, since recBytes is volatile. Use a temporary to build the 32-bit value and then store it into the volatile, or just do it all in one assignment:
#define OFFSET_iTOW 0
volatile uint8_t temp[128];
volatile uint32_t recBytes;
void main()
{
while (1)
{
recBytes = (uint32_t)temp[OFFSET_iTOW+3] << 24 |
(uint32_t)temp[OFFSET_iTOW+2] << 16 |
(uint32_t)temp[OFFSET_iTOW+1] << 8 |
(uint32_t)temp[OFFSET_iTOW+0];
}
}
Upvotes: 2
Reputation: 44308
if you use a union you can get to the byte parts of the long.
union Data
{
uint8_t bytes[4];
uint32_t value;
} recBytes;
then
recBytes.bytes[0] = temp[OFFSET_iTOW+3];
recBytes.bytes[1] = temp[OFFSET_iTOW+2];
recBytes.bytes[2] = temp[OFFSET_iTOW+1];
recBytes.bytes[3] = temp[OFFSET_iTOW];
then recBytes.value
will be what you want ( though I'm not 100% about the byte ordering, you may have to reverse it)
Upvotes: 3