// Similar in concept to Lua's pack and unpack // Main differences are: // - Some formats are reinterpreted to mean the same thing across architectures // - Default endianness is big-endian (but that can easily be changed, of course) // - Formats are parsed once and used many times, to reduce overhead // - Polyfills will store packers and unpackers in weak tables // - Default alignment is no alignment // - The maximum size of an integer is 64 bits (instead of 128) #include #include #include #include #include #include "lib.h" #define FMT_UDATA_NAME "fmt.meta" typedef enum { FMT_INT8, FMT_UINT8, FMT_INT16, FMT_UINT16, FMT_INT32, FMT_UINT32, FMT_INT64, FMT_UINT64, FMT_FLOAT32, FMT_FLOAT64, FMT_STR_FIXED, FMT_STR8, FMT_STR16, FMT_STR32, FMT_STR64, FMT_STR_ZERO, FMT_PADDING, } fmt_type_t; typedef enum { FMT_OK, FMT_BAD_ARGS, FMT_INCOMPLETE_OP, FMT_BAD_OP_ARG, FMT_BAD_OP, } fmt_code_t; typedef struct { fmt_type_t type; size_t size, align; } fmt_segment_t; typedef struct { bool little_endian; size_t max_align; size_t segments_n; fmt_segment_t segments[]; } fmt_t; static int fmt_parse_width(size_t *i, const char *raw, size_t raw_size) { if (*i < raw_size) { switch (raw[(*i)++]) { case '1': return 1; case '2': return 2; case '4': return 4; case '8': return 8; default: (*i)--; return 8; } } else return 8; } static fmt_code_t fmt_parse_fmt(lua_State *ctx, const char *raw, size_t raw_size) { if (raw == NULL || *raw == 0) return FMT_BAD_ARGS; if (raw_size == -1) raw_size = strlen(raw); fmt_segment_t segments[raw_size]; size_t segments_n = 0; bool little_endian; bool max_align; size_t i = 0; bool curr_padding = false; while (true) { if (i >= raw_size) break; fmt_segment_t segment; switch (raw[i++]) { case '<': little_endian = true; continue; case '>': case '=': little_endian = false; continue; case '!': if (i < raw_size && (raw[i] < '0' || raw[i] > '9')) { max_align = raw[i] - '0'; i++; } else max_align = 1; continue; case 'b': segment = (fmt_segment_t){ .type = FMT_INT8, .align = 1 }; break; case 'B': segment = (fmt_segment_t){ .type = FMT_UINT8, .align = 1 }; break; case 'h': segment = (fmt_segment_t){ .type = FMT_INT16, .align = 2 }; break; case 'H': segment = (fmt_segment_t){ .type = FMT_UINT16, .align = 2 }; break; case 'j': segment = (fmt_segment_t){ .type = FMT_INT32, .align = 4 }; break; case 'J': segment = (fmt_segment_t){ .type = FMT_UINT32, .align = 4 }; break; case 'l': segment = (fmt_segment_t){ .type = FMT_INT64, .align = 8 }; break; case 'L': case 'T': segment = (fmt_segment_t){ .type = FMT_UINT64, .align = 8 }; break; case 'f': segment = (fmt_segment_t){ .type = FMT_FLOAT32, .align = 4 }; break; case 'd': case 'n': segment = (fmt_segment_t){ .type = FMT_FLOAT64, .align = 8 }; break; case 'i': switch (fmt_parse_width(&i, raw, raw_size)) { case 1: segment = (fmt_segment_t){ .type = FMT_INT8, .align = 1 }; break; case 2: segment = (fmt_segment_t){ .type = FMT_INT16, .align = 2 }; break; case 4: segment = (fmt_segment_t){ .type = FMT_INT32, .align = 4 }; break; case 8: segment = (fmt_segment_t){ .type = FMT_INT64, .align = 8 }; break; } break; case 'I': switch (fmt_parse_width(&i, raw, raw_size)) { case 1: segment = (fmt_segment_t){ .type = FMT_UINT8, .align = 1 }; break; case 2: segment = (fmt_segment_t){ .type = FMT_UINT16, .align = 2 }; break; case 4: segment = (fmt_segment_t){ .type = FMT_UINT32, .align = 4 }; break; case 8: segment = (fmt_segment_t){ .type = FMT_UINT64, .align = 8 }; break; } break; case 'z': segment = (fmt_segment_t){ .type = FMT_STR_ZERO, .align = 1 }; break; case 'c': bool has_size; size_t str_size; while (i >= raw_size && raw[i] >= '0' && raw[i] <= '9') { str_size *= 10; str_size += raw[i] - '0'; has_size = true; i++; } if (!has_size) return FMT_INCOMPLETE_OP; segment = (fmt_segment_t){ .type = FMT_STR_FIXED, .size = str_size, .align = 1 }; break; case 's': switch (fmt_parse_width(&i, raw, raw_size)) { case 1: segment = (fmt_segment_t){ .type = FMT_STR8, .align = 1 }; break; case 2: segment = (fmt_segment_t){ .type = FMT_STR16, .align = 2 }; break; case 4: segment = (fmt_segment_t){ .type = FMT_STR32, .align = 4 }; break; case 8: segment = (fmt_segment_t){ .type = FMT_STR64, .align = 8 }; break; } break; case 'x': segment = (fmt_segment_t){ .type = FMT_PADDING, .size = 1, .align = 1 }; break; case 'X': curr_padding = true; continue; case ' ': break; default: return FMT_BAD_OP; } if (curr_padding) { // TODO: is this correct? switch (segment.type) { case FMT_INT8: case FMT_UINT8: case FMT_STR8: segment = (fmt_segment_t){ .type = FMT_PADDING, .size = 1, .align = 1 }; break; case FMT_INT16: case FMT_UINT16: case FMT_STR16: segment = (fmt_segment_t){ .type = FMT_PADDING, .size = 2, .align = 2 }; break; case FMT_INT32: case FMT_UINT32: case FMT_FLOAT32: case FMT_STR32: segment = (fmt_segment_t){ .type = FMT_PADDING, .size = 4, .align = 4 }; break; case FMT_INT64: case FMT_UINT64: case FMT_FLOAT64: case FMT_STR64: segment = (fmt_segment_t){ .type = FMT_PADDING, .size = 8, .align = 8 }; break; case FMT_STR_FIXED: segment = (fmt_segment_t){ .type = FMT_PADDING, .size = segment.size, .align = segment.size }; break; default: return FMT_BAD_OP_ARG; } } segments[segments_n++] = segment; continue; } if (curr_padding) return FMT_INCOMPLETE_OP; for (size_t i = 0; i < segments_n; i++) { // TODO: this might not be correct... if (segments[i].align > max_align) segments[i].align = 8; if (segments[i].align > max_align) segments[i].align = 4; if (segments[i].align > max_align) segments[i].align = 2; if (segments[i].align > max_align) segments[i].align = 1; } fmt_t *res = lua_newuserdata(ctx, sizeof *res + sizeof *segments * segments_n); luaL_getmetatable(ctx, FMT_UDATA_NAME); lua_setmetatable(ctx, -2); res->little_endian = little_endian; res->max_align = max_align; res->segments_n = segments_n; memcpy(res->segments, segments, sizeof *segments * segments_n); return FMT_OK; } typedef struct { char *arr; size_t cap, n; } write_buff_t; static void write_buff_append(write_buff_t *buff, const char *data, size_t size) { size_t new_cap = buff->cap; while (new_cap < buff->n + size) { if (new_cap == 0) new_cap = 16; else new_cap *= 2; } if (new_cap != buff->cap) { if (buff->cap == 0) { buff->arr = malloc(new_cap); } else { buff->arr = realloc(buff->arr, new_cap); } buff->cap = new_cap; } if (data == NULL) { memset(buff->arr + buff->n, 0, size); } else { memcpy(buff->arr + buff->n, data, size); } buff->n += size; } static void write_buff_fit(write_buff_t *buff) { if (buff->arr == NULL) return; if (buff->n == 0) { free(buff->arr); buff->cap = 0; } else { buff->cap = buff->n; buff->arr = realloc(buff->arr, buff->n); } } static bool fmt_read_uint8(const uint8_t *raw, size_t *i, size_t size, uint8_t *res) { if ((*i) + 1 > size || *i < 0) return false; *res = raw[(*i)++]; return true; } static bool fmt_read_int8(const uint8_t *raw, size_t *i, size_t size, int8_t *res) { return fmt_read_uint8(raw, i, size, (uint8_t*)res); } static bool fmt_read_uint16(const uint8_t *raw, size_t *i, size_t size, uint16_t *res, bool little_endian) { if ((*i) + 2 > size || *i < 0) return false; uint16_t a = raw[(*i)++]; uint16_t b = raw[(*i)++]; if (little_endian) *res = a | b << 8; else *res = a << 8 | b; return true; } static bool fmt_read_int16(const uint8_t *raw, size_t *i, size_t size, int16_t *res, bool little_endian) { return fmt_read_uint16(raw, i, size, (uint16_t*)res, little_endian); } static bool fmt_read_uint32(const uint8_t *raw, size_t *i, size_t size, uint32_t *res, bool little_endian) { if ((*i) + 4 > size || *i < 0) return false; uint8_t a = raw[(*i)++]; uint8_t b = raw[(*i)++]; uint8_t c = raw[(*i)++]; uint8_t d = raw[(*i)++]; if (little_endian) *res = a | b << 8 | c << 16 | d << 24; else *res = a << 24 | b << 16 | c << 8 | d; return true; } static bool fmt_read_int32(const uint8_t *raw, size_t *i, size_t size, int32_t *res, bool little_endian) { return fmt_read_uint32(raw, i, size, (uint32_t*)res, little_endian); } static bool fmt_read_uint64(const uint8_t *raw, size_t *i, size_t size, uint64_t *res, bool little_endian) { if ((*i) + 8 > size || *i < 0) return false; uint64_t a = raw[(*i)++]; uint64_t b = raw[(*i)++]; uint64_t c = raw[(*i)++]; uint64_t d = raw[(*i)++]; uint64_t e = raw[(*i)++]; uint64_t f = raw[(*i)++]; uint64_t g = raw[(*i)++]; uint64_t h = raw[(*i)++]; if (little_endian) *res = a | b << 8 | c << 16 | d << 24 | e << 32 | f << 40 | g << 48 | h << 56; else *res = a << 56 | b << 48 | c << 40 | d << 32 | e << 24 | f << 16 | g << 8 | h; return true; } static bool fmt_read_int64(const uint8_t *raw, size_t *i, size_t size, int64_t *res, bool little_endian) { return fmt_read_uint64(raw, i, size, (uint64_t*)res, little_endian); } static bool fmt_read_float32(const uint8_t *raw, size_t *i, size_t size, float *res, bool little_endian) { if ((*i) + 4 > size || *i < 0) return false; uint8_t a = raw[(*i)++]; uint8_t b = raw[(*i)++]; uint8_t c = raw[(*i)++]; uint8_t d = raw[(*i)++]; // TODO: is this portable enough? if (little_endian) *res = *(float*)(uint8_t[]) { a, b, c, d }; else *res = *(float*)(uint8_t[]) { d, c, b, a }; return true; } static bool fmt_read_float64(const uint8_t *raw, size_t *i, size_t size, double *res, bool little_endian) { if ((*i) + 8 > size || *i < 0) return false; uint8_t a = raw[(*i)++]; uint8_t b = raw[(*i)++]; uint8_t c = raw[(*i)++]; uint8_t d = raw[(*i)++]; uint8_t e = raw[(*i)++]; uint8_t f = raw[(*i)++]; uint8_t g = raw[(*i)++]; uint8_t h = raw[(*i)++]; // TODO: is this portable enough? if (little_endian) *res = *(float*)(uint8_t[]) { a, b, c, d, e, f, g, h }; else *res = *(float*)(uint8_t[]) { h, g, f, e, d, c, b, a }; return true; } static bool fmt_read_string(lua_State *ctx, fmt_segment_t segment, const uint8_t *raw, size_t *i, size_t size, bool little_endian) { uint64_t len; switch (segment.type) { case FMT_STR8: { uint8_t len8; if (!fmt_read_uint8(raw, i, size, &len8)) return false; break; } case FMT_STR16: { uint16_t len16; if (!fmt_read_uint16(raw, i, size, &len16, little_endian)) return false; len = len16; break; } case FMT_STR32: { uint32_t len32; if (!fmt_read_uint32(raw, i, size, &len32, little_endian)) return false; len = len32; break; } case FMT_STR64: if (!fmt_read_uint64(raw, i, size, &len, little_endian)) return false; break; case FMT_STR_FIXED: len = segment.size; break; case FMT_STR_ZERO: len = strnlen((const char*)(raw + *i), size - *i); if (len >= size - *i) return false; break; default: return false; } fprintf(stderr, "%lu %lu", len, *i); if ((*i) + len > size) return false; if (*i < 0) return false; char data[len]; memcpy(data, raw + (*i), len); lua_pushlstring(ctx, data, len); return true; } static void fmt_write_uint8(write_buff_t *buff, uint8_t val) { write_buff_append(buff, (char[]){ val }, 1); } static void fmt_write_int8(write_buff_t *buff, int8_t val) { return fmt_write_uint8(buff, val); } static void fmt_write_uint16(write_buff_t *buff, uint16_t val, bool little_endian) { uint8_t a = val & 0xFF; uint8_t b = val >> 8 & 0xFF; if (little_endian) write_buff_append(buff, (char[]){ a, b }, 2); else write_buff_append(buff, (char[]){ b, a }, 2); } static void fmt_write_int16(write_buff_t *buff, uint16_t val, bool little_endian) { fmt_write_uint16(buff, val, little_endian); } static void fmt_write_uint32(write_buff_t *buff, uint32_t val, bool little_endian) { uint8_t a = val & 0xFF; uint8_t b = val >> 8 & 0xFF; uint8_t c = val >> 16 & 0xFF; uint8_t d = val >> 24 & 0xFF; if (little_endian) write_buff_append(buff, (char[]){ a, b, c, d }, 4); else write_buff_append(buff, (char[]){ d, c, b, a }, 4); } static void fmt_write_int32(write_buff_t *buff, uint32_t val, bool little_endian) { fmt_write_uint32(buff, val, little_endian); } static void fmt_write_uint64(write_buff_t *buff, uint64_t val, bool little_endian) { uint8_t a = val & 0xFF; uint8_t b = val >> 8 & 0xFF; uint8_t c = val >> 16 & 0xFF; uint8_t d = val >> 24 & 0xFF; uint8_t e = val >> 32 & 0xFF; uint8_t f = val >> 40 & 0xFF; uint8_t g = val >> 48 & 0xFF; uint8_t h = val >> 56 & 0xFF; if (little_endian) write_buff_append(buff, (char[]){ a, b, c, d, e, f, g, h }, 8); else write_buff_append(buff, (char[]){ h, g, f, e, d, c, b, a }, 8); } static void fmt_write_int64(write_buff_t *buff, uint64_t val, bool little_endian) { fmt_write_uint64(buff, val, little_endian); } static void fmt_write_float32(write_buff_t *buff, float val, bool little_endian) { uint32_t ival = *(uint32_t*)&val; uint8_t a = ival & 0xFF; uint8_t b = ival >> 8 & 0xFF; uint8_t c = ival >> 16 & 0xFF; uint8_t d = ival >> 24 & 0xFF; if (little_endian) write_buff_append(buff, (char[]){ a, b, c, d }, 4); else write_buff_append(buff, (char[]){ d, c, b, a }, 4); } static void fmt_write_float64(write_buff_t *buff, double val, bool little_endian) { uint64_t ival = *(uint64_t*)&val; uint8_t a = ival & 0xFF; uint8_t b = ival >> 8 & 0xFF; uint8_t c = ival >> 16 & 0xFF; uint8_t d = ival >> 24 & 0xFF; uint8_t e = ival >> 32 & 0xFF; uint8_t f = ival >> 40 & 0xFF; uint8_t g = ival >> 48 & 0xFF; uint8_t h = ival >> 56 & 0xFF; if (little_endian) write_buff_append(buff, (char[]){ a, b, c, d, e, f, g, h }, 8); else write_buff_append(buff, (char[]){ h, g, f, e, d, c, b, a }, 8); } static bool fmt_write_string(write_buff_t *buff, fmt_segment_t segment, const char *str, size_t len, bool little_endian) { switch (segment.type) { case FMT_STR8: if (len > 256) return false; fmt_write_uint8(buff, (uint8_t)len); write_buff_append(buff, str, len); return true; case FMT_STR16: if (len > 0x10000) return false; fmt_write_uint16(buff, (uint16_t)len, little_endian); write_buff_append(buff, str, len); return true; case FMT_STR32: if (len > 0x100000000) return false; fmt_write_uint32(buff, (uint32_t)len, little_endian); write_buff_append(buff, str, len); return true; case FMT_STR64: fmt_write_uint64(buff, len, little_endian); write_buff_append(buff, str, len); return true; case FMT_STR_FIXED: if (len > segment.size) return false; write_buff_append(buff, str, len); write_buff_append(buff, NULL, segment.size - len); return true; case FMT_STR_ZERO: if (strlen(str) != len) return false; write_buff_append(buff, str, len); write_buff_append(buff, (char[]) { 0x00 }, 1); return true; default: return false; } } static int lib_fmt_pack(lua_State *ctx) { fmt_t *fmt = luaL_checkudata(ctx, 1, FMT_UDATA_NAME); size_t arg_i = 2; write_buff_t buff = { .arr = NULL, .n = 0, .cap = 0 }; for (size_t i = 0; i < fmt->segments_n; i++) { fmt_segment_t segment = fmt->segments[i]; size_t n_aligned = -((-buff.n / segment.align) * segment.align); size_t align_padding = n_aligned - buff.n; write_buff_append(&buff, NULL, align_padding); switch (segment.type) { case FMT_INT8: fmt_write_int8(&buff, luaL_checkinteger(ctx, arg_i++)); break; case FMT_UINT8: fmt_write_uint8(&buff, luaL_checkinteger(ctx, arg_i++)); break; case FMT_INT16: fmt_write_int16(&buff, luaL_checkinteger(ctx, arg_i++), fmt->little_endian); break; case FMT_UINT16: fmt_write_uint16(&buff, luaL_checkinteger(ctx, arg_i++), fmt->little_endian); break; case FMT_INT32: fmt_write_int32(&buff, luaL_checkinteger(ctx, arg_i++), fmt->little_endian); break; case FMT_UINT32: fmt_write_uint32(&buff, luaL_checkinteger(ctx, arg_i++), fmt->little_endian); break; case FMT_INT64: fmt_write_int64(&buff, luaL_checkinteger(ctx, arg_i++), fmt->little_endian); break; case FMT_UINT64: fmt_write_uint64(&buff, luaL_checkinteger(ctx, arg_i++), fmt->little_endian); break; case FMT_FLOAT32: fmt_write_float32(&buff, (float)luaL_checknumber(ctx, arg_i++), fmt->little_endian); break; case FMT_FLOAT64: fmt_write_float64(&buff, (double)luaL_checknumber(ctx, arg_i++), fmt->little_endian); break; case FMT_STR_FIXED: case FMT_STR8: case FMT_STR16: case FMT_STR32: case FMT_STR64: case FMT_STR_ZERO: { size_t size; const char *str = luaL_checklstring(ctx, arg_i++, &size); if (!fmt_write_string(&buff, segment, str, size, fmt->little_endian)) { luaL_error(ctx, "invalid string at %d", arg_i); } break; } case FMT_PADDING: // TODO: might need to remove this later... write_buff_append(&buff, NULL, segment.size); } } write_buff_fit(&buff); lua_pushlstring(ctx, buff.arr, buff.n); free(buff.arr); return 1; } static int lib_fmt_unpack(lua_State *ctx) { fmt_t *fmt = luaL_checkudata(ctx, 1, FMT_UDATA_NAME); size_t raw_size; const uint8_t *raw = (const uint8_t*)luaL_checklstring(ctx, 2, &raw_size); size_t read_i = 0; size_t res_n = 0; if (lua_isinteger(ctx, 3)) { read_i = lua_tointeger(ctx, 3) - 1; } write_buff_t buff = { .arr = NULL, .n = 0, .cap = 0 }; for (size_t i = 0; i < fmt->segments_n; i++) { fmt_segment_t segment = fmt->segments[i]; size_t n_aligned = -((-buff.n / segment.align) * segment.align); size_t align_padding = n_aligned - buff.n; read_i += align_padding; switch (segment.type) { case FMT_INT8: { int8_t res; if (!fmt_read_int8(raw, &read_i, raw_size, &res)) luaL_error(ctx, "couldn't read int8"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_UINT8: { uint8_t res; if (!fmt_read_uint8(raw, &read_i, raw_size, &res)) luaL_error(ctx, "couldn't read uint8"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_INT16: { int16_t res; if (!fmt_read_int16(raw, &read_i, raw_size, &res, fmt->little_endian)) luaL_error(ctx, "couldn't read int16"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_UINT16: { uint16_t res; if (!fmt_read_uint16(raw, &read_i, raw_size, &res, fmt->little_endian)) luaL_error(ctx, "couldn't read uint16"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_INT32: { int32_t res; if (!fmt_read_int32(raw, &read_i, raw_size, &res, fmt->little_endian)) luaL_error(ctx, "couldn't read int32"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_UINT32: { uint32_t res; if (!fmt_read_uint32(raw, &read_i, raw_size, &res, fmt->little_endian)) luaL_error(ctx, "couldn't read uint32"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_INT64: { int64_t res; if (!fmt_read_int64(raw, &read_i, raw_size, &res, fmt->little_endian)) luaL_error(ctx, "couldn't read int64"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_UINT64: { uint64_t res; if (!fmt_read_uint64(raw, &read_i, raw_size, &res, fmt->little_endian)) luaL_error(ctx, "couldn't read uint64"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_FLOAT32: { float res; if (!fmt_read_float32(raw, &read_i, raw_size, &res, fmt->little_endian)) luaL_error(ctx, "couldn't read float32"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_FLOAT64: { double res; if (!fmt_read_float64(raw, &read_i, raw_size, &res, fmt->little_endian)) luaL_error(ctx, "couldn't read float64"); lua_pushinteger(ctx, res); res_n++; break; } case FMT_STR_FIXED: case FMT_STR8: case FMT_STR16: case FMT_STR32: case FMT_STR64: case FMT_STR_ZERO: { if (!fmt_read_string(ctx, segment, raw, &read_i, raw_size, fmt->little_endian)) luaL_error(ctx, "couldn't read string"); res_n++; break; } case FMT_PADDING: read_i += segment.size; break; } } lua_pushinteger(ctx, read_i + 1); return res_n + 1; } static int lib_fmt_new(lua_State *ctx) { size_t raw_size; const char *raw = luaL_checklstring(ctx, 1, &raw_size); fmt_code_t code = fmt_parse_fmt(ctx, raw, raw_size); if (code != FMT_OK) { switch (code) { case FMT_BAD_ARGS: return luaL_error(ctx, "illegal C arguments"); case FMT_INCOMPLETE_OP: return luaL_error(ctx, "incomplete operand in format string"); case FMT_BAD_OP_ARG: return luaL_error(ctx, "bad operand argument in format string"); case FMT_BAD_OP: return luaL_error(ctx, "bad operand in format string"); default: return luaL_error(ctx, "unknown error while parsing format string"); } } return 1; } static void fmt_init_meta(lua_State *ctx) { luaL_newmetatable(ctx, FMT_UDATA_NAME); luaL_newlib(ctx, ((luaL_Reg[]) { { "pack", lib_fmt_pack }, { "unpack", lib_fmt_unpack }, { NULL, NULL } })); lua_setfield(ctx, -2, "__index"); lua_pushboolean(ctx, false); lua_setfield(ctx, -2, "__meta"); lua_pop(ctx, 1); } int fmt_open_lib(lua_State *ctx) { fmt_init_meta(ctx); luaL_newlib(ctx, ((luaL_Reg[]) { { "new", lib_fmt_new }, { "pack", lib_fmt_pack }, { "unpack", lib_fmt_unpack }, { NULL, NULL }, })); return 1; }