From 55c3b29f7b20f3801848e0ab71e9de1d22207b95 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 29 Aug 2015 23:58:28 +0200 Subject: [PATCH] Parse Unicode string escape \u{XX...}. Thanks to drbo. --- doc/changes.html | 1 + doc/extensions.html | 7 +++++++ src/lj_lex.c | 27 +++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/doc/changes.html b/doc/changes.html index febb03c7..125b58b4 100644 --- a/doc/changes.html +++ b/doc/changes.html @@ -86,6 +86,7 @@ Please take a look at the commit history for more details.
  • Add LJ_GC64 mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.
  • Add LJ_FR2 mode: Two-slot frame info. Required by LJ_GC64 mode.
  • Add table.new() and table.clear().
  • +
  • Parse Unicode escape '\u{XX...}' in string literals.
  • Parse binary number literals (0bxxx).
  • Improvements to the JIT compiler: diff --git a/doc/extensions.html b/doc/extensions.html index 84ca5ce4..e034e1dc 100644 --- a/doc/extensions.html +++ b/doc/extensions.html @@ -344,6 +344,13 @@ Lua 5.1, which prevents implementing features that would otherwise break the Lua/C API and ABI (e.g. _ENV).

    +

    Extensions from Lua 5.3

    +

    +LuaJIT supports some extensions from Lua 5.3: +

    +

    C++ Exception Interoperability

    LuaJIT has built-in support for interoperating with C++ exceptions. diff --git a/src/lj_lex.c b/src/lj_lex.c index 8409cd78..5a918f74 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -214,6 +214,33 @@ static void lex_string(LexState *ls, TValue *tv) c += 9; } break; + case 'u': /* Unicode escape '\u{XX...}'. */ + if (lex_next(ls) != '{') goto err_xesc; + lex_next(ls); + c = 0; + do { + c = (c << 4) | (ls->c & 15u); + if (!lj_char_isdigit(ls->c)) { + if (!lj_char_isxdigit(ls->c)) goto err_xesc; + c += 9; + } + if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ + } while (lex_next(ls) != '}'); + if (c < 0x800) { + if (c < 0x80) break; + lex_save(ls, 0xc0 | (c >> 6)); + } else { + if (c >= 0x10000) { + lex_save(ls, 0xf0 | (c >> 18)); + lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); + } else { + if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ + lex_save(ls, 0xe0 | (c >> 12)); + } + lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); + } + c = 0x80 | (c & 0x3f); + break; case 'z': /* Skip whitespace. */ lex_next(ls); while (lj_char_isspace(ls->c))