Skip to content

Commit 15330cb

Browse files
authored
Merge pull request jgarff#468 from mcaralp/master
Optimization of ws2811_render function using precomputed tables
2 parents 76c5eca + 23ba92f commit 15330cb

File tree

1 file changed

+84
-63
lines changed

1 file changed

+84
-63
lines changed

ws2811.c

Lines changed: 84 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,6 @@
7272
RPI_PWM_CHANNELS)
7373
#define PCM_BYTE_COUNT(leds, freq) ((((LED_BIT_COUNT(leds, freq) >> 3) & ~0x7) + 4) + 4)
7474

75-
// Symbol definitions
76-
#define SYMBOL_HIGH 0x6 // 1 1 0
77-
#define SYMBOL_LOW 0x4 // 1 0 0
78-
79-
// Symbol definitions for software inversion (PCM and SPI only)
80-
#define SYMBOL_HIGH_INV 0x1 // 0 0 1
81-
#define SYMBOL_LOW_INV 0x3 // 0 1 1
82-
8375
// Driver mode definitions
8476
#define NONE 0
8577
#define PWM 1
@@ -1137,17 +1129,84 @@ ws2811_return_t ws2811_wait(ws2811_t *ws2811)
11371129
*/
11381130
ws2811_return_t ws2811_render(ws2811_t *ws2811)
11391131
{
1132+
static uint8_t convert_table[3][256] =
1133+
{
1134+
{
1135+
0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92,
1136+
0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92,
1137+
0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x92, 0x93, 0x93, 0x93,
1138+
0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93,
1139+
0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93, 0x93,
1140+
0x93, 0x93, 0x93, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A,
1141+
0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A,
1142+
0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9A, 0x9B, 0x9B, 0x9B, 0x9B,
1143+
0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B,
1144+
0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B, 0x9B,
1145+
0x9B, 0x9B, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2,
1146+
0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2,
1147+
0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD2, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3,
1148+
0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3,
1149+
0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3, 0xD3,
1150+
0xD3, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
1151+
0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
1152+
0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB,
1153+
0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB,
1154+
0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB, 0xDB
1155+
},
1156+
{
1157+
0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x4D,
1158+
0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69,
1159+
0x69, 0x69, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x49, 0x49, 0x49,
1160+
0x49, 0x49, 0x49, 0x49, 0x49, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D,
1161+
0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D,
1162+
0x6D, 0x6D, 0x6D, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x4D, 0x4D,
1163+
0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69,
1164+
0x69, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x49, 0x49, 0x49, 0x49,
1165+
0x49, 0x49, 0x49, 0x49, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x69,
1166+
0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D,
1167+
0x6D, 0x6D, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x4D, 0x4D, 0x4D,
1168+
0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69,
1169+
0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x49, 0x49, 0x49, 0x49, 0x49,
1170+
0x49, 0x49, 0x49, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x69, 0x69,
1171+
0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D,
1172+
0x6D, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49, 0x4D, 0x4D, 0x4D, 0x4D,
1173+
0x4D, 0x4D, 0x4D, 0x4D, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x6D,
1174+
0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x49, 0x49, 0x49, 0x49, 0x49, 0x49,
1175+
0x49, 0x49, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x4D, 0x69, 0x69, 0x69,
1176+
0x69, 0x69, 0x69, 0x69, 0x69, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D, 0x6D
1177+
},
1178+
{
1179+
0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24,
1180+
0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6,
1181+
0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34,
1182+
0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6,
1183+
0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4,
1184+
0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26,
1185+
0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4,
1186+
0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36,
1187+
0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24,
1188+
0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6,
1189+
0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34,
1190+
0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6,
1191+
0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4,
1192+
0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26,
1193+
0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4,
1194+
0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36,
1195+
0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24,
1196+
0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6,
1197+
0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34,
1198+
0x36, 0xA4, 0xA6, 0xB4, 0xB6, 0x24, 0x26, 0x34, 0x36, 0xA4, 0xA6, 0xB4, 0xB6
1199+
}
1200+
};
1201+
11401202
volatile uint8_t *pxl_raw = ws2811->device->pxl_raw;
11411203
int driver_mode = ws2811->device->driver_mode;
1142-
int bitpos;
1143-
int i, k, l, chan;
1204+
int i, l, chan;
11441205
unsigned j;
11451206
ws2811_return_t ret = WS2811_SUCCESS;
11461207
uint32_t protocol_time = 0;
11471208
static uint64_t previous_timestamp = 0;
11481209

1149-
bitpos = (driver_mode == SPI ? 7 : 31);
1150-
11511210
for (chan = 0; chan < RPI_PWM_CHANNELS; chan++) // Channel
11521211
{
11531212
ws2811_channel_t *channel = &ws2811->channel[chan];
@@ -1184,57 +1243,19 @@ ws2811_return_t ws2811_render(ws2811_t *ws2811)
11841243

11851244
for (j = 0; j < array_size; j++) // Color
11861245
{
1187-
for (k = 7; k >= 0; k--) // Bit
1188-
{
1189-
// Inversion is handled by hardware for PWM, otherwise by software here
1190-
uint8_t symbol = SYMBOL_LOW;
1191-
if ((driver_mode != PWM) && channel->invert) symbol = SYMBOL_LOW_INV;
1192-
1193-
if (color[j] & (1 << k))
1194-
{
1195-
symbol = SYMBOL_HIGH;
1196-
if ((driver_mode != PWM) && channel->invert) symbol = SYMBOL_HIGH_INV;
1197-
}
1198-
1199-
for (l = 2; l >= 0; l--) // Symbol
1200-
{
1201-
uint32_t *wordptr = &((uint32_t *)pxl_raw)[wordpos]; // PWM & PCM
1202-
volatile uint8_t *byteptr = &pxl_raw[bytepos]; // SPI
1203-
1204-
if (driver_mode == SPI)
1205-
{
1206-
*byteptr &= ~(1 << bitpos);
1207-
if (symbol & (1 << l))
1208-
{
1209-
*byteptr |= (1 << bitpos);
1210-
}
1211-
}
1212-
else // PWM & PCM
1213-
{
1214-
*wordptr &= ~(1 << bitpos);
1215-
if (symbol & (1 << l))
1216-
{
1217-
*wordptr |= (1 << bitpos);
1218-
}
1219-
}
1220-
1221-
bitpos--;
1222-
if (bitpos < 0)
1223-
{
1224-
if (driver_mode == SPI)
1225-
{
1226-
bytepos++;
1227-
bitpos = 7;
1228-
}
1229-
else // PWM & PCM
1230-
{
1231-
// Every other word is on the same channel for PWM
1232-
wordpos += (driver_mode == PWM ? 2 : 1);
1233-
bitpos = 31;
1234-
}
1235-
}
1236-
}
1237-
}
1246+
for(l = 0; l < 3; ++l)
1247+
{
1248+
uint8_t pos = driver_mode == SPI ? bytepos : 3 - bytepos;
1249+
uint8_t val = convert_table[l][color[j]];
1250+
if ((driver_mode != PWM) && channel->invert) val = ~val;
1251+
1252+
pxl_raw[wordpos * 4 + pos] = val;
1253+
if(++bytepos == 4)
1254+
{
1255+
bytepos = 0;
1256+
wordpos += driver_mode == PWM ? 2 : 1;
1257+
}
1258+
}
12381259
}
12391260
}
12401261
}

0 commit comments

Comments
 (0)