diff --git a/doc/changes.html b/doc/changes.html index 2107193a..04e26e40 100644 --- a/doc/changes.html +++ b/doc/changes.html @@ -36,8 +36,6 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
  • ffi.* API
  • -64 bit Integers -
  • FFI Semantics
  • diff --git a/doc/contact.html b/doc/contact.html index a979cb2f..a85c4884 100644 --- a/doc/contact.html +++ b/doc/contact.html @@ -33,8 +33,6 @@
  • ffi.* API
  • -64 bit Integers -
  • FFI Semantics
  • diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html index 119c89f5..35e2234e 100644 --- a/doc/ext_c_api.html +++ b/doc/ext_c_api.html @@ -33,8 +33,6 @@
  • ffi.* API
  • -64 bit Integers -
  • FFI Semantics
  • diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html index 50a44052..1fd276dc 100644 --- a/doc/ext_ffi.html +++ b/doc/ext_ffi.html @@ -33,8 +33,6 @@
  • ffi.* API
  • -64 bit Integers -
  • FFI Semantics
  • @@ -86,22 +84,30 @@ Please use the FFI sub-topics in the navigation bar to learn more. It's really easy to call an external C library function:

    -local ffi = require("ffi")
    -ffi.cdef[[
    -int printf(const char *fmt, ...);
    +local ffi = require("ffi") --
    +ffi.cdef[[ //
    +int printf(const char *fmt, ...);
     ]]
    -ffi.C.printf("Hello %s!", "world")
    +ffi.C.printf("Hello %s!", "world") --
     

    -So, let's pick that apart: the first line (in blue) loads the FFI -library. The next one adds a C declaration for the function. The -part between the double-brackets (in green) is just standard -C syntax. And the last line calls the named C function. Yes, -it's that simple! +So, let's pick that apart: +

    +

    + Load the FFI library. +

    +

    + Add a C declaration +for the function. The part inside the double-brackets (in green) is +just standard C syntax. +

    +

    + Call the named +C function — Yes, it's that simple!

    -Actually, what goes on behind the scenes is far from simple: the first -part of the last line (in orange) makes use of the standard +Actually, what goes on behind the scenes is far from simple: makes use of the standard C library namespace ffi.C. Indexing this namespace with a symbol name ("printf") automatically binds it to the the standard C library. The result is a special kind of object which, @@ -120,7 +126,7 @@ So here's something to pop up a message box on Windows:

     local ffi = require("ffi")
     ffi.cdef[[
    -int MessageBoxA(void *w, const char *txt, const char *cap, int type);
    +int MessageBoxA(void *w, const char *txt, const char *cap, int type);
     ]]
     ffi.C.MessageBoxA(nil, "Hello world!", "Test", 0)
     
    @@ -193,24 +199,24 @@ And here's the FFI version. The modified parts have been marked in bold:

    -local ffi = require("ffi")
    -ffi.cdef[[
    -typedef struct { uint8_t red, green, blue, alpha; } rgba_pixel;
    +local ffi = require("ffi") --
    +ffi.cdef[[
    +typedef struct { uint8_t red, green, blue, alpha; } rgba_pixel;
     ]]
     
     local function image_ramp_green(n)
    -  local img = ffi.new("rgba_pixel[?]", n)
    +  local img = ffi.new("rgba_pixel[?]", n) --
       local f = 255/(n-1)
    -  for i=0,n-1 do
    -    img[i].green = i*f
    +  for i=0,n-1 do --
    +    img[i].green = i*f --
         img[i].alpha = 255
       end
       return img
     end
     
     local function image_to_grey(img, n)
    -  for i=0,n-1 do
    -    local y = 0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue
    +  for i=0,n-1 do --
    +    local y = 0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue --
         img[i].red = y; img[i].green = y; img[i].blue = y
       end
     end
    @@ -222,25 +228,37 @@ for i=1,1000 do
     end
     

    -Ok, so that wasn't too difficult: first, load the FFI library and -declare the low-level data type. Here we choose a struct -which holds four byte fields, one for each component of a 4x8 bit -RGBA pixel. +Ok, so that wasn't too difficult:

    -Creating the data structure with ffi.new() is straightforward -— the '?' is a placeholder for the number of elements -of a variable-length array. C arrays are zero-based, so the -indexes have to run from 0 to n-1 (one might -allocate one more element instead to simplify converting legacy -code). Since ffi.new() zero-fills the array by default, we -only need to set the green and the alpha fields. + First, load the FFI +library and declare the low-level data type. Here we choose a +struct which holds four byte fields, one for each component +of a 4x8 bit RGBA pixel.

    -The calls to math.floor() can be omitted here, because -floating-point numbers are already truncated towards zero when -converting them to an integer. This happens implicitly when the number -is stored in the fields of each pixel. + Creating the data +structure with ffi.new() is straightforward — the +'?' is a placeholder for the number of elements of a +variable-length array. +

    +

    + C arrays are +zero-based, so the indexes have to run from 0 to +n-1. One might want to allocate one more element instead to +simplify converting legacy code. +

    +

    + Since ffi.new() +zero-fills the array by default, we only need to set the green and the +alpha fields. +

    +

    + The calls to +math.floor() can be omitted here, because floating-point +numbers are already truncated towards zero when converting them to an +integer. This happens implicitly when the number is stored in the +fields of each pixel.

    Now let's have a look at the impact of the changes: first, memory diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html index 7c2e53dd..9bedd52e 100644 --- a/doc/ext_ffi_api.html +++ b/doc/ext_ffi_api.html @@ -38,8 +38,6 @@ td.abiparam { font-weight: bold; width: 6em; }

  • ffi.* API
  • -64 bit Integers -
  • FFI Semantics
  • @@ -106,7 +104,7 @@ follows:

     ffi.cdef[[
    -typedef struct foo { int a, b; } foo_t;  // Declare a struct and typedef.
    +typedef struct foo { int a, b; } foo_t;  // Declare a struct and typedef.
     int dofoo(foo_t *f, int n);  /* Declare an external C function. */
     ]]
     
    @@ -237,12 +235,8 @@ rules.

    This functions is mainly useful to override the pointer compatibility -rules or to convert pointers to addresses or vice versa. For maximum -portability you should convert a pointer to its address as follows: +checks or to convert pointers to addresses or vice versa.

    -
    -local addr = tonumber(ffi.cast("intptr_t", ptr))
    -

    C Type Information

    @@ -383,6 +377,45 @@ Contains the target OS name. Same contents as Contains the target architecture name. Same contents as jit.arch.

    + +

    Extended Standard Library Functions

    +

    +The following standard library functions have been extended to work +with cdata objects: +

    + +

    n = tonumber(cdata)

    +

    +Converts a number cdata object to a double and returns it as +a Lua number. This is particularly useful for boxed 64 bit +integer values. Caveat: this conversion may incur a precision loss. +

    + +

    s = tostring(cdata)

    +

    +Returns a string representation of the value of 64 bit integers +("nnnLL" or "nnnULL") or +complex numbers ("re±imi"). Otherwise +returns a string representation of the C type of a ctype object +("ctype<type>") or a cdata object +("cdata<type>: address"). +

    + +

    Extensions to the Lua Parser

    +

    +The parser for Lua source code treats numeric literals with the +suffixes LL or ULL as signed or unsigned 64 bit +integers. Case doesn't matter, but uppercase is recommended for +readability. It handles both decimal (42LL) and hexadecimal +(0x2aLL) literals. +

    +

    +The imaginary part of complex numbers can be specified by suffixing +number literals with i or I, e.g. 12.5i. +Caveat: you'll need to use 1i to get an imaginary part with +the value one, since i itself still refers to a variable +named i. +


  • ffi.* API
  • -64 bit Integers -
  • FFI Semantics
  • @@ -653,7 +651,10 @@ parameters given by the function declaration. Arguments passed to the variable argument part of vararg C function use special conversion rules. This C function is called and the return value (if any) is -converted to a Lua object.
  • +converted to a Lua object.
    +On Windows/x86 systems, stdcall functions are automatically +detected and a function declared as cdecl (the default) is +silently fixed up after the first call. @@ -672,15 +673,24 @@ can be subtracted. The result is the difference between their addresses, divided by the element size in bytes. An error is raised if the element size is undefined or zero. -
  • 64 bit integer arithmetic: -the standard arithmetic operators -(+ - * / % ^ and unary -) -can be applied to two cdata numbers, or a cdata number and a Lua -number. If one of them is an uint64_t, the other side is +
  • 64 bit integer arithmetic: the standard arithmetic +operators (+ - * / % ^ and unary +minus) can be applied to two cdata numbers, or a cdata number and a +Lua number. If one of them is an uint64_t, the other side is converted to an uint64_t and an unsigned arithmetic operation is performed. Otherwise both sides are converted to an int64_t and a signed arithmetic operation is performed. The -result is a boxed 64 bit cdata object.
  • +result is a boxed 64 bit cdata object.
    + +These rules ensure that 64 bit integers are "sticky". Any +expression involving at least one 64 bit integer operand results +in another one. The undefined cases for the division, modulo and power +operators return 2LL ^ 63 or +2ULL ^ 63.
    + +You'll have to explicitly convert a 64 bit integer to a Lua +number (e.g. for regular floating-point calculations) with +tonumber(). But note this may incur a precision loss. @@ -692,12 +702,12 @@ can be compared. The result is the same as an unsigned comparison of their addresses. nil is treated like a NULL pointer, which is compatible with any other pointer type. -
  • 64 bit integer comparison: -two cdata numbers, or a cdata number and a Lua number can be compared -with each other. If one of them is an uint64_t, the other -side is converted to an uint64_t and an unsigned comparison -is performed. Otherwise both sides are converted to an -int64_t and a signed comparison is performed.
  • +
  • 64 bit integer comparison: two cdata numbers, or a +cdata number and a Lua number can be compared with each other. If one +of them is an uint64_t, the other side is converted to an +uint64_t and an unsigned comparison is performed. Otherwise +both sides are converted to an int64_t and a signed +comparison is performed.
  • diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html index 11e83339..c43b223b 100644 --- a/doc/ext_ffi_tutorial.html +++ b/doc/ext_ffi_tutorial.html @@ -8,6 +8,13 @@ +
    @@ -33,8 +40,6 @@
  • ffi.* API
  • -64 bit Integers -
  • FFI Semantics
  • @@ -57,7 +62,14 @@
  • -TODO +This page is intended to give you an overview of the features of the FFI +library by presenting a few use cases and guidelines. +

    +

    +This page makes no attempt to explain all of the FFI library, though. +You'll want to have a look at the ffi.* API +function reference and the FFI +semantics to learn more.

    Loading the FFI Library

    @@ -76,7 +88,339 @@ of globals — you really need to use the local variable. The require function ensures the library is only loaded once.

    -

    TODO

    +

    Accessing Standard System Functions

    +

    +The following code explains how to access standard system functions. +We slowly print two lines of dots by sleeping for 10 milliseconds +after each dot: +

    +
    +local ffi = require("ffi")
    +ffi.cdef[[ //
    +void Sleep(int ms);
    +int poll(struct pollfd *fds, unsigned long nfds, int timeout);
    +]]
    +
    +local sleep
    +if ffi.os == "Windows" then --
    +  function sleep(s) --
    +    ffi.C.Sleep(s*1000) --
    +  end
    +else
    +  function sleep(s)
    +    ffi.C.poll(nil, 0, s*1000) --
    +  end
    +end
    +
    +for i=1,160 do
    +  io.write("."); io.flush()
    +  sleep(0.01) --
    +end
    +io.write("\n")
    +
    +

    +Here's the step-by-step explanation: +

    +

    + This defines the +C library functions we're going to use. The part inside the +double-brackets (in green) is just standard C syntax. You can +usually get this info from the C header files or the +documentation provided by each C library or C compiler. +

    +

    + The difficulty we're +facing here, is that there are different standards to choose from. +Windows has a simple Sleep() function. On other systems there +are a variety of functions available to achieve sub-second sleeps, but +with no clear consensus. Thankfully poll() can be used for +this task, too, and it's present on most non-Windows systems. The +check for ffi.os makes sure we use the Windows-specific +function only on Windows systems. +

    +

    + Here we're wrapping the +call to the C function in a Lua function. This isn't strictly +necessary, but it's helpful to deal with system-specific issues only +in one part of the code. The way we're wrapping it ensures the check +for the OS is only done during initialization and not for every call. +

    +

    + A more subtle point is +that we defined our sleep() function (for the sake of this +example) as taking the number of seconds, but accepting fractional +seconds. Multiplying this by 1000 gets us milliseconds, but that still +leaves it a Lua number, which is a floating-point value. Alas, the +Sleep() function only accepts an integer value. Luckily for +us, the FFI library automatically performs the conversion when calling +the function (truncating the FP value towards zero, like in C). +

    +

    +Some readers will notice that Sleep() is part of +KERNEL32.DLL and is also a stdcall function. So how +can this possibly work? The FFI library provides the ffi.C +default C library namespace, which allows calling functions from +the default set of libraries, like a C compiler would. Also, the +FFI library automatically detects stdcall functions, so you +don't need to declare them as such. +

    +

    + The poll() +function takes a couple more arguments we're not going to use. You can +simply use nil to pass a NULL pointer and 0 +for the nfds parameter. Please note that the +number 0 does not convert to a pointer value, +unlike in C++. You really have to pass pointers to pointer arguments +and numbers to number arguments. +

    +

    +The page on FFI semantics has all +of the gory details about +conversions between Lua +objects and C types. For the most part you don't have to deal +with this, as it's performed automatically and it's carefully designed +to bridge the semantic differences between Lua and C. +

    +

    + Now that we have defined +our own sleep() function, we can just call it from plain Lua +code. That wasn't so bad, huh? Turning these boring animated dots into +a fascinating best-selling game is left as an exercise for the reader. +:-) +

    + +

    Accessing the zlib Compression Library

    +

    +The following code shows how to access the zlib compression library from Lua code. +We'll define two convenience wrapper functions that take a string and +compress or uncompress it to another string: +

    +
    +local ffi = require("ffi")
    +ffi.cdef[[ //
    +unsigned long compressBound(unsigned long sourceLen);
    +int compress2(uint8_t *dest, unsigned long *destLen,
    +	      const uint8_t *source, unsigned long sourceLen, int level);
    +int uncompress(uint8_t *dest, unsigned long *destLen,
    +	       const uint8_t *source, unsigned long sourceLen);
    +]]
    +local zlib = ffi.load(ffi.os == "Windows" and "zlib1" or "z") --
    +
    +local function compress(txt)
    +  local n = zlib.compressBound(#txt) --
    +  local buf = ffi.new("uint8_t[?]", n)
    +  local buflen = ffi.new("unsigned long[1]", n) --
    +  local res = zlib.compress2(buf, buflen, txt, #txt, 9)
    +  assert(res == 0)
    +  return ffi.string(buf, buflen[0]) --
    +end
    +
    +local function uncompress(comp, n) --
    +  local buf = ffi.new("uint8_t[?]", n)
    +  local buflen = ffi.new("unsigned long[1]", n)
    +  local res = zlib.uncompress(buf, buflen, comp, #comp)
    +  assert(res == 0)
    +  return ffi.string(buf, buflen[0])
    +end
    +
    +-- Simple test code. --
    +local txt = string.rep("abcd", 1000)
    +print("Uncompressed size: ", #txt)
    +local c = compress(txt)
    +print("Compressed size: ", #c)
    +local txt2 = uncompress(c, #txt)
    +assert(txt2 == txt)
    +
    +

    +Here's the step-by-step explanation: +

    +

    + This defines some of the +C functions provided by zlib. For the sake of this example, some +type indirections have been reduced and it uses the pre-defined +fixed-size integer types, while still adhering to the zlib API/ABI. +

    +

    + This loads the zlib shared +library. On POSIX systems it's named libz.so and usually +comes pre-installed. Since ffi.load() automatically adds any +missing standard prefixes/suffixes, we can simply load the +"z" library. On Windows it's named zlib1.dll and +you'll have to download it first from the +» zlib site. The check for +ffi.os makes sure we pass the right name to +ffi.load(). +

    +

    + First, the maximum size of +the compression buffer is obtained by calling the +zlib.compressBound function with the length of the +uncompressed string. The next line allocates a byte buffer of this +size. The [?] in the type specification indicates a +variable-length array (VLA). The actual number of elements of this +array is given as the 2nd argument to ffi.new(). +

    +

    + This may look strange at +first, but have a look at the declaration of the compress2 +function from zlib: the destination length is defined as a pointer! +This is because you pass in the maximum buffer size and get back the +actual length that was used. +

    +

    +In C you'd pass in the address of a local variable +(&buflen). But since there's no address-of operator in +Lua, we'll just pass in a one-element array. Conveniently it can be +initialized with the maximum buffer size in one step. Calling the +actual zlib.compress2 function is then straightforward. +

    +

    + We want to return the +compressed data as a Lua string, so we'll use ffi.string(). +It needs a pointer to the start of the data and the actual length. The +length has been returned in the buflen array, so we'll just +get it from there. +

    +

    +Note that since the function returns now, the buf and +buflen variables will eventually be garbage collected. This +is fine, because ffi.string() has copied the contents to a +newly created (interned) Lua string. If you plan to call this function +lots of times, consider reusing the buffers and/or handing back the +results in buffers instead of strings. This will reduce the overhead +for garbage collection and string interning. +

    +

    + The uncompress +functions does the exact opposite of the compress function. +The compressed data doesn't include the size of the original string, +so this needs to be passed in. Otherwise no surprises here. +

    +

    + The code, that makes use +of the functions we just defined, is just plain Lua code. It doesn't +need to know anything about the LuaJIT FFI — the convenience +wrapper functions completely hide it. +

    +

    +One major advantage of the LuaJIT FFI is that you are now able to +write those wrappers in Lua. And at a fraction of the time it +would cost you to create an extra C module using the Lua/C API. +Many of the simpler C functions can probably be used directly +from your Lua code, without any wrappers. +

    +

    +Side note: the zlib API uses the long type for passing +lengths and sizes around. But all those zlib functions actually only +deal with 32 bit values. This is an unfortunate choice for a +public API, but may be explained by zlib's history — we'll just +have to deal with it. +

    +

    +First, you should know that a long is a 64 bit type e.g. +on POSIX/x64 systems, but a 32 bit type on Windows/x64 and on +32 bit systems. Thus a long result can be either a plain +Lua number or a boxed 64 bit integer cdata object, depending on +the target system. +

    +

    +Ok, so the ffi.* functions generally accept cdata objects +wherever you'd want to use a number. That's why we get a away with +passing n to ffi.string() above. But other Lua +library functions or modules don't know how to deal with this. So for +maximum portability one needs to use tonumber() on returned +long results before passing them on. Otherwise the +application might work on some systems, but would fail in a POSIX/x64 +environment. +

    + +

    Translating C Idioms

    +

    +Here's a list of common C idioms and their translation to the +LuaJIT FFI: +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    IdiomC codeLua code
    Pointer dereference
    int *p;
    x = *p;
    *p = y;
    x = p[0]
    p[0] = y
    Pointer indexing
    int i, *p;
    x = p[i];
    p[i+1] = y;
    x = p[i]
    p[i+1] = y
    Array indexing
    int i, a[];
    x = a[i];
    a[i+1] = y;
    x = a[i]
    a[i+1] = y
    struct/union dereference
    struct foo s;
    x = s.field;
    s.field = y;
    x = s.field
    s.field = y
    struct/union pointer deref.
    struct foo *sp;
    x = sp->field;
    sp->field = y;
    x = s.field
    s.field = y
    Pointer arithmetic
    int i, *p;
    x = p + i;
    y = p - i;
    x = p + i
    y = p - i
    Pointer difference
    int *p1, *p2;
    x = p1 - p2;x = p1 - p2
    Array element pointer
    int i, a[];
    x = &a[i];x = a+i
    Cast pointer to address
    int *p;
    x = (intptr_t)p;x = tonumber(
     ffi.cast("intptr_t",
              p))
    Functions with outargs
    void foo(int *inoutlen);
    int len = x;
    foo(&len);
    y = len;
    local len =
      ffi.new("int[1]", x)
    foo(len)
    y = len[0]
    Vararg conversions
    int printf(char *fmt, ...);
    printf("%g", 1.0);
    printf("%d", 1);
     
    printf("%g", 1);
    printf("%d",
      ffi.new("int", 1))
    + +

    To Cache or Not to Cache

    +

    +It's a common Lua idiom to cache library functions in local variables +or upvalues, e.g.: +

    +
    +local byte, char = string.byte, string.char
    +local function foo(x)
    +  return char(byte(x)+1)
    +end
    +
    +

    +This replaces several hash-table lookups with a (faster) direct use of +a local or an upvalue. This is less important with LuaJIT, since the +JIT compiler optimizes hash-table lookups a lot and is even able to +hoist most of them out of the inner loops. It can't eliminate +all of them, though, and it saves some typing for often-used +functions. So there's still a place for this, even with LuaJIT. +

    +

    +The situation is a bit different with C function calls via the +FFI library. The JIT compiler has special logic to eliminate all +of the lookup overhead for functions resolved from a +C library namespace! +Thus it's not helpful and actually counter-productive to cache +individual C functions like this: +

    +
    +local funca, funcb = ffi.C.funcb, ffi.C.funcb -- Not helpful!
    +local function foo(x, n)
    +  for i=1,n do funcb(funca(x, i), 1) end
    +end
    +
    +

    +This turns them into indirect calls and generates bigger and slower +machine code. Instead you'll want to cache the namespace itself and +rely on the JIT compiler to eliminate the lookups: +

    +
    +local C = ffi.C          -- Instead use this!
    +local function foo(x, n)
    +  for i=1,n do C.funcb(C.funca(x, i), 1) end
    +end
    +
    +

    +This generates both shorter and faster code. So don't cache +C functions, but do cache namespaces! Most often the +namespace is already in a local variable at an outer scope, e.g. from +local lib = ffi.load(...). Note that copying +it to a local variable in the function scope is unnecessary. +