diff --git a/doc/changes.html b/doc/changes.html
index 2107193a..04e26e40 100644
--- a/doc/changes.html
+++ b/doc/changes.html
@@ -36,8 +36,6 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/contact.html b/doc/contact.html
index a979cb2f..a85c4884 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -33,8 +33,6 @@
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
index 119c89f5..35e2234e 100644
--- a/doc/ext_c_api.html
+++ b/doc/ext_c_api.html
@@ -33,8 +33,6 @@
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
index 50a44052..1fd276dc 100644
--- a/doc/ext_ffi.html
+++ b/doc/ext_ffi.html
@@ -33,8 +33,6 @@
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
@@ -86,22 +84,30 @@ Please use the FFI sub-topics in the navigation bar to learn more.
 It's really easy to call an external C&nbsp;library function:
 </p>
 <pre class="code">
-<span style="color:#000080;">local ffi = require("ffi")</span>
-ffi.cdef[[
-<span style="color:#00a000;font-weight:bold;">int printf(const char *fmt, ...);</span>
+local ffi = require("ffi") <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9312;</span>
+ffi.cdef[[ <span style="color:#f0f4ff;">//</span><span style="color:#4040c0;">&#9313;</span>
+<span style="color:#00a000;">int printf(const char *fmt, ...);</span>
 ]]
-<span style="color:#c06000;font-weight:bold;">ffi.C</span>.printf("Hello %s!", "world")
+ffi.C.printf("Hello %s!", "world") <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9314;</span>
 </pre>
 <p>
-So, let's pick that apart: the first line (in blue) loads the FFI
-library. The next one adds a C&nbsp;declaration for the function. The
-part between the double-brackets (in green) is just standard
-C&nbsp;syntax. And the last line calls the named C&nbsp;function. Yes,
-it's that simple!
+So, let's pick that apart:
+</p>
+<p>
+<span style="color:#4040c0;">&#9312;</span> Load the FFI library.
+</p>
+<p>
+<span style="color:#4040c0;">&#9313;</span> Add a C&nbsp;declaration
+for the function. The part inside the double-brackets (in green) is
+just standard C&nbsp;syntax.
+</p>
+<p>
+<span style="color:#4040c0;">&#9314;</span> Call the named
+C&nbsp;function &mdash; Yes, it's that simple!
 </p>
 <p style="font-size: 8pt;">
-Actually, what goes on behind the scenes is far from simple: the first
-part of the last line (in orange) makes use of the standard
+Actually, what goes on behind the scenes is far from simple: <span
+style="color:#4040c0;">&#9314;</span> makes use of the standard
 C&nbsp;library namespace <tt>ffi.C</tt>. Indexing this namespace with
 a symbol name (<tt>"printf"</tt>) automatically binds it to the the
 standard C&nbsp;library. The result is a special kind of object which,
@@ -120,7 +126,7 @@ So here's something to pop up a message box on Windows:
 <pre class="code">
 local ffi = require("ffi")
 ffi.cdef[[
-int MessageBoxA(void *w, const char *txt, const char *cap, int type);
+<span style="color:#00a000;">int MessageBoxA(void *w, const char *txt, const char *cap, int type);</span>
 ]]
 ffi.C.MessageBoxA(nil, "Hello world!", "Test", 0)
 </pre>
@@ -193,24 +199,24 @@ And here's the FFI version. The modified parts have been marked in
 bold:
 </p>
 <pre class="code">
-<b>local ffi = require("ffi")
-ffi.cdef[[
-typedef struct { uint8_t red, green, blue, alpha; } rgba_pixel;
+<b>local ffi = require("ffi")</b> <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9312;</span>
+<b>ffi.cdef[[
+</b><span style="color:#00a000;">typedef struct { uint8_t red, green, blue, alpha; } rgba_pixel;</span><b>
 ]]</b>
 
 local function image_ramp_green(n)
-  <b>local img = ffi.new("rgba_pixel[?]", n)</b>
+  <b>local img = ffi.new("rgba_pixel[?]", n)</b> <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9313;</span>
   local f = 255/(n-1)
-  for i=<b>0,n-1</b> do
-    <b>img[i].green = i*f</b>
+  for i=<b>0,n-1</b> do <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9314;</span>
+    <b>img[i].green = i*f</b> <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9315;</span>
     <b>img[i].alpha = 255</b>
   end
   return img
 end
 
 local function image_to_grey(img, n)
-  for i=<b>0,n-1</b> do
-    local y = <b>0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue</b>
+  for i=<b>0,n-1</b> do <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9314;</span>
+    local y = <b>0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue</b> <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9316;</span>
     img[i].red = y; img[i].green = y; img[i].blue = y
   end
 end
@@ -222,25 +228,37 @@ for i=1,1000 do
 end
 </pre>
 <p>
-Ok, so that wasn't too difficult: first, load the FFI library and
-declare the low-level data type. Here we choose a <tt>struct</tt>
-which holds four byte fields, one for each component of a 4x8&nbsp;bit
-RGBA pixel.
+Ok, so that wasn't too difficult:
 </p>
 <p>
-Creating the data structure with <tt>ffi.new()</tt> is straightforward
-&mdash; the <tt>'?'</tt> is a placeholder for the number of elements
-of a variable-length array. C&nbsp;arrays are zero-based, so the
-indexes have to run from <tt>0</tt> to <tt>n-1</tt> (one might
-allocate one more element instead to simplify converting legacy
-code). Since <tt>ffi.new()</tt> zero-fills the array by default, we
-only need to set the green and the alpha fields.
+<span style="color:#4040c0;">&#9312;</span> First, load the FFI
+library and declare the low-level data type. Here we choose a
+<tt>struct</tt> which holds four byte fields, one for each component
+of a 4x8&nbsp;bit RGBA pixel.
 </p>
 <p>
-The calls to <tt>math.floor()</tt> can be omitted here, because
-floating-point numbers are already truncated towards zero when
-converting them to an integer. This happens implicitly when the number
-is stored in the fields of each pixel.
+<span style="color:#4040c0;">&#9313;</span> Creating the data
+structure with <tt>ffi.new()</tt> is straightforward &mdash; the
+<tt>'?'</tt> is a placeholder for the number of elements of a
+variable-length array.
+</p>
+<p>
+<span style="color:#4040c0;">&#9314;</span> C&nbsp;arrays are
+zero-based, so the indexes have to run from <tt>0</tt> to
+<tt>n-1</tt>. One might want to allocate one more element instead to
+simplify converting legacy code.
+</p>
+<p>
+<span style="color:#4040c0;">&#9315;</span> Since <tt>ffi.new()</tt>
+zero-fills the array by default, we only need to set the green and the
+alpha fields.
+</p>
+<p>
+<span style="color:#4040c0;">&#9316;</span> The calls to
+<tt>math.floor()</tt> can be omitted here, because floating-point
+numbers are already truncated towards zero when converting them to an
+integer. This happens implicitly when the number is stored in the
+fields of each pixel.
 </p>
 <p>
 Now let's have a look at the impact of the changes: first, memory
diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
index 7c2e53dd..9bedd52e 100644
--- a/doc/ext_ffi_api.html
+++ b/doc/ext_ffi_api.html
@@ -38,8 +38,6 @@ td.abiparam { font-weight: bold; width: 6em; }
 </li><li>
 <a class="current" href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
@@ -106,7 +104,7 @@ follows:
 </p>
 <pre class="code">
 ffi.cdef[[
-<span style="color:#00a000;font-weight:bold;">typedef struct foo { int a, b; } foo_t;  // Declare a struct and typedef.
+<span style="color:#00a000;">typedef struct foo { int a, b; } foo_t;  // Declare a struct and typedef.
 int dofoo(foo_t *f, int n);  /* Declare an external C function. */</span>
 ]]
 </pre>
@@ -237,12 +235,8 @@ rules</a>.
 </p>
 <p>
 This functions is mainly useful to override the pointer compatibility
-rules or to convert pointers to addresses or vice versa. For maximum
-portability you should convert a pointer to its address as follows:
+checks or to convert pointers to addresses or vice versa.
 </p>
-<pre class="code">
-local addr = tonumber(ffi.cast("intptr_t", ptr))
-</pre>
 
 <h2 id="info">C&nbsp;Type Information</h2>
 <p>
@@ -383,6 +377,45 @@ Contains the target OS name. Same contents as
 Contains the target architecture name. Same contents as
 <a href="ext_jit.html#jit_arch"><tt>jit.arch</tt></a>.
 </p>
+
+<h2 id="extended">Extended Standard Library Functions</h2>
+<p>
+The following standard library functions have been extended to work
+with cdata objects:
+</p>
+
+<h3 id="tonumber"><tt>n = tonumber(cdata)</tt></h3>
+<p>
+Converts a number cdata object to a <tt>double</tt> and returns it as
+a Lua number. This is particularly useful for boxed 64&nbsp;bit
+integer values. Caveat: this conversion may incur a precision loss.
+</p>
+
+<h3 id="tostring"><tt>s = tostring(cdata)</tt></h3>
+<p>
+Returns a string representation of the value of 64&nbsp;bit integers
+(<tt><b>"</b>nnn<b>LL"</b></tt> or <tt><b>"</b>nnn<b>ULL"</b></tt>) or
+complex numbers (<tt><b>"</b>re&plusmn;im<b>i"</b></tt>). Otherwise
+returns a string representation of the C&nbsp;type of a ctype object
+(<tt><b>"ctype&lt;</b>type<b>&gt;"</b></tt>) or a cdata object
+(<tt><b>"cdata&lt;</b>type<b>&gt;:&nbsp;</b>address"</tt>).
+</p>
+
+<h2 id="literals">Extensions to the Lua Parser</h2>
+<p>
+The parser for Lua source code treats numeric literals with the
+suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
+integers. Case doesn't matter, but uppercase is recommended for
+readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal
+(<tt>0x2aLL</tt>) literals.
+</p>
+<p>
+The imaginary part of complex numbers can be specified by suffixing
+number literals with <tt>i</tt> or <tt>I</tt>, e.g. <tt>12.5i</tt>.
+Caveat: you'll need to use <tt>1i</tt> to get an imaginary part with
+the value one, since <tt>i</tt> itself still refers to a variable
+named <tt>i</tt>.
+</p>
 <br class="flush">
 </div>
 <div id="foot">
diff --git a/doc/ext_ffi_int64.html b/doc/ext_ffi_int64.html
deleted file mode 100644
index fa155825..00000000
--- a/doc/ext_ffi_int64.html
+++ /dev/null
@@ -1,73 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-<title>64 bit Integers</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Author" content="Mike Pall">
-<meta name="Copyright" content="Copyright (C) 2005-2011, Mike Pall">
-<meta name="Language" content="en">
-<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
-<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
-</head>
-<body>
-<div id="site">
-<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
-</div>
-<div id="head">
-<h1>64 bit Integers</h1>
-</div>
-<div id="nav">
-<ul><li>
-<a href="luajit.html">LuaJIT</a>
-<ul><li>
-<a href="install.html">Installation</a>
-</li><li>
-<a href="running.html">Running</a>
-</li></ul>
-</li><li>
-<a href="extensions.html">Extensions</a>
-<ul><li>
-<a href="ext_ffi.html">FFI Library</a>
-<ul><li>
-<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
-</li><li>
-<a href="ext_ffi_api.html">ffi.* API</a>
-</li><li>
-<a class="current" href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
-<a href="ext_ffi_semantics.html">FFI Semantics</a>
-</li></ul>
-</li><li>
-<a href="ext_jit.html">jit.* Library</a>
-</li><li>
-<a href="ext_c_api.html">Lua/C API</a>
-</li></ul>
-</li><li>
-<a href="status.html">Status</a>
-<ul><li>
-<a href="changes.html">Changes</a>
-</li></ul>
-</li><li>
-<a href="faq.html">FAQ</a>
-</li><li>
-<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
-</li><li>
-<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
-</li></ul>
-</div>
-<div id="main">
-<p>
-TODO
-</p>
-<br class="flush">
-</div>
-<div id="foot">
-<hr class="hide">
-Copyright &copy; 2005-2011 Mike Pall
-<span class="noprint">
-&middot;
-<a href="contact.html">Contact</a>
-</span>
-</div>
-</body>
-</html>
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
index b2b3af30..69dfc2ca 100644
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -39,8 +39,6 @@ td.convop { font-style: italic; width: 16em; }
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a class="current" href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
@@ -653,7 +651,10 @@ parameters given by the function declaration. Arguments passed to the
 variable argument part of vararg C&nbsp;function use
 <a href="#convert_vararg">special conversion rules</a>. This
 C&nbsp;function is called and the return value (if any) is
-<a href="#convert_tolua">converted to a Lua object</a>.</li>
+<a href="#convert_tolua">converted to a Lua object</a>.<br>
+On Windows/x86 systems, <tt>stdcall</tt> functions are automatically
+detected and a function declared as <tt>cdecl</tt> (the default) is
+silently fixed up after the first call.</li>
 
 </ul>
 
@@ -672,15 +673,24 @@ can be subtracted. The result is the difference between their
 addresses, divided by the element size in bytes. An error is raised if
 the element size is undefined or zero.</li>
 
-<li><a href="ext_ffi_int64.html">64&nbsp;bit integer arithmetic</a>:
-the standard arithmetic operators
-(<tt>+&nbsp;-&nbsp;*&nbsp;/&nbsp;%&nbsp;^</tt> and unary <tt>-</tt>)
-can be applied to two cdata numbers, or a cdata number and a Lua
-number. If one of them is an <tt>uint64_t</tt>, the other side is
+<li><b>64&nbsp;bit integer arithmetic</b>: the standard arithmetic
+operators (<tt>+&nbsp;-&nbsp;*&nbsp;/&nbsp;%&nbsp;^</tt> and unary
+minus) can be applied to two cdata numbers, or a cdata number and a
+Lua number. If one of them is an <tt>uint64_t</tt>, the other side is
 converted to an <tt>uint64_t</tt> and an unsigned arithmetic operation
 is performed. Otherwise both sides are converted to an
 <tt>int64_t</tt> and a signed arithmetic operation is performed. The
-result is a boxed 64&nbsp;bit cdata object.</li>
+result is a boxed 64&nbsp;bit cdata object.<br>
+
+These rules ensure that 64&nbsp;bit integers are "sticky". Any
+expression involving at least one 64&nbsp;bit integer operand results
+in another one. The undefined cases for the division, modulo and power
+operators return <tt>2LL&nbsp;^&nbsp;63</tt> or
+<tt>2ULL&nbsp;^&nbsp;63</tt>.<br>
+
+You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
+number (e.g. for regular floating-point calculations) with
+<tt>tonumber()</tt>. But note this may incur a precision loss.</li>
 
 </ul>
 
@@ -692,12 +702,12 @@ can be compared. The result is the same as an unsigned comparison of
 their addresses. <tt>nil</tt> is treated like a <tt>NULL</tt> pointer,
 which is compatible with any other pointer type.</li>
 
-<li><a href="ext_ffi_int64.html">64&nbsp;bit integer comparison</a>:
-two cdata numbers, or a cdata number and a Lua number can be compared
-with each other. If one of them is an <tt>uint64_t</tt>, the other
-side is converted to an <tt>uint64_t</tt> and an unsigned comparison
-is performed. Otherwise both sides are converted to an
-<tt>int64_t</tt> and a signed comparison is performed.</li>
+<li><b>64&nbsp;bit integer comparison</b>: two cdata numbers, or a
+cdata number and a Lua number can be compared with each other. If one
+of them is an <tt>uint64_t</tt>, the other side is converted to an
+<tt>uint64_t</tt> and an unsigned comparison is performed. Otherwise
+both sides are converted to an <tt>int64_t</tt> and a signed
+comparison is performed.</li>
 
 </ul>
 
diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
index 11e83339..c43b223b 100644
--- a/doc/ext_ffi_tutorial.html
+++ b/doc/ext_ffi_tutorial.html
@@ -8,6 +8,13 @@
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+table.idiomtable { line-height: 1.2; }
+tr.idiomhead td { font-weight: bold; }
+td.idiomc { width: 12em; }
+td.idiomlua { width: 14em; }
+td.idiomlua b { font-weight: normal; color: #2142bf; }
+</style>
 </head>
 <body>
 <div id="site">
@@ -33,8 +40,6 @@
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
@@ -57,7 +62,14 @@
 </div>
 <div id="main">
 <p>
-TODO
+This page is intended to give you an overview of the features of the FFI
+library by presenting a few use cases and guidelines.
+</p>
+<p>
+This page makes no attempt to explain all of the FFI library, though.
+You'll want to have a look at the <a href="ext_ffi_api.html">ffi.* API
+function reference</a> and the <a href="ext_ffi_semantics.html">FFI
+semantics</a> to learn more.
 </p>
 
 <h2 id="load">Loading the FFI Library</h2>
@@ -76,7 +88,339 @@ of globals &mdash; you really need to use the local variable. The
 <tt>require</tt> function ensures the library is only loaded once.
 </p>
 
-<h2>TODO</h2>
+<h2 id="sleep">Accessing Standard System Functions</h2>
+<p>
+The following code explains how to access standard system functions.
+We slowly print two lines of dots by sleeping for 10&nbsp;milliseconds
+after each dot:
+</p>
+<pre class="code">
+local ffi = require("ffi")
+ffi.cdef[[ <span style="color:#f0f4ff;">//</span><span style="color:#4040c0;">&#9312;</span>
+<span style="color:#00a000;">void Sleep(int ms);
+int poll(struct pollfd *fds, unsigned long nfds, int timeout);</span>
+]]
+
+local sleep
+if ffi.os == "Windows" then <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9313;</span>
+  function sleep(s) <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9314;</span>
+    ffi.C.Sleep(s*1000) <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9315;</span>
+  end
+else
+  function sleep(s)
+    ffi.C.poll(nil, 0, s*1000) <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9316;</span>
+  end
+end
+
+for i=1,160 do
+  io.write("."); io.flush()
+  sleep(0.01) <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9317;</span>
+end
+io.write("\n")
+</pre>
+<p>
+Here's the step-by-step explanation:
+</p>
+<p>
+<span style="color:#4040c0;">&#9312;</span> This defines the
+C&nbsp;library functions we're going to use. The part inside the
+double-brackets (in green) is just standard C&nbsp;syntax. You can
+usually get this info from the C&nbsp;header files or the
+documentation provided by each C&nbsp;library or C&nbsp;compiler.
+</p>
+<p>
+<span style="color:#4040c0;">&#9313;</span> The difficulty we're
+facing here, is that there are different standards to choose from.
+Windows has a simple <tt>Sleep()</tt> function. On other systems there
+are a variety of functions available to achieve sub-second sleeps, but
+with no clear consensus. Thankfully <tt>poll()</tt> can be used for
+this task, too, and it's present on most non-Windows systems. The
+check for <tt>ffi.os</tt> makes sure we use the Windows-specific
+function only on Windows systems.
+</p>
+<p>
+<span style="color:#4040c0;">&#9314;</span> Here we're wrapping the
+call to the C&nbsp;function in a Lua function. This isn't strictly
+necessary, but it's helpful to deal with system-specific issues only
+in one part of the code. The way we're wrapping it ensures the check
+for the OS is only done during initialization and not for every call.
+</p>
+<p>
+<span style="color:#4040c0;">&#9315;</span> A more subtle point is
+that we defined our <tt>sleep()</tt> function (for the sake of this
+example) as taking the number of seconds, but accepting fractional
+seconds. Multiplying this by 1000 gets us milliseconds, but that still
+leaves it a Lua number, which is a floating-point value. Alas, the
+<tt>Sleep()</tt> function only accepts an integer value. Luckily for
+us, the FFI library automatically performs the conversion when calling
+the function (truncating the FP value towards zero, like in C).
+</p>
+<p style="font-size: 8pt;">
+Some readers will notice that <tt>Sleep()</tt> is part of
+<tt>KERNEL32.DLL</tt> and is also a <tt>stdcall</tt> function. So how
+can this possibly work? The FFI library provides the <tt>ffi.C</tt>
+default C&nbsp;library namespace, which allows calling functions from
+the default set of libraries, like a C&nbsp;compiler would. Also, the
+FFI library automatically detects <tt>stdcall</tt> functions, so you
+don't need to declare them as such.
+</p>
+<p>
+<span style="color:#4040c0;">&#9316;</span> The <tt>poll()</tt>
+function takes a couple more arguments we're not going to use. You can
+simply use <tt>nil</tt> to pass a <tt>NULL</tt> pointer and <tt>0</tt>
+for the <tt>nfds</tt> parameter. Please note that the
+number&nbsp;<tt>0</tt> <em>does not convert to a pointer value</em>,
+unlike in C++. You really have to pass pointers to pointer arguments
+and numbers to number arguments.
+</p>
+<p style="font-size: 8pt;">
+The page on <a href="ext_ffi_semantics.html">FFI semantics</a> has all
+of the gory details about
+<a href="ext_ffi_semantics.html#convert">conversions between Lua
+objects and C&nbsp;types</a>. For the most part you don't have to deal
+with this, as it's performed automatically and it's carefully designed
+to bridge the semantic differences between Lua and C.
+</p>
+<p>
+<span style="color:#4040c0;">&#9317;</span> Now that we have defined
+our own <tt>sleep()</tt> function, we can just call it from plain Lua
+code. That wasn't so bad, huh? Turning these boring animated dots into
+a fascinating best-selling game is left as an exercise for the reader.
+:-)
+</p>
+
+<h2 id="zlib">Accessing the zlib Compression Library</h2>
+<p>
+The following code shows how to access the <a
+href="http://zlib.net/">zlib</a> compression library from Lua code.
+We'll define two convenience wrapper functions that take a string and
+compress or uncompress it to another string:
+</p>
+<pre class="code">
+local ffi = require("ffi")
+ffi.cdef[[ <span style="color:#f0f4ff;">//</span><span style="color:#4040c0;">&#9312;</span>
+<span style="color:#00a000;">unsigned long compressBound(unsigned long sourceLen);
+int compress2(uint8_t *dest, unsigned long *destLen,
+	      const uint8_t *source, unsigned long sourceLen, int level);
+int uncompress(uint8_t *dest, unsigned long *destLen,
+	       const uint8_t *source, unsigned long sourceLen);</span>
+]]
+local zlib = ffi.load(ffi.os == "Windows" and "zlib1" or "z") <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9313;</span>
+
+local function compress(txt)
+  local n = zlib.compressBound(#txt) <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9314;</span>
+  local buf = ffi.new("uint8_t[?]", n)
+  local buflen = ffi.new("unsigned long[1]", n) <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9315;</span>
+  local res = zlib.compress2(buf, buflen, txt, #txt, 9)
+  assert(res == 0)
+  return ffi.string(buf, buflen[0]) <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9316;</span>
+end
+
+local function uncompress(comp, n) <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9317;</span>
+  local buf = ffi.new("uint8_t[?]", n)
+  local buflen = ffi.new("unsigned long[1]", n)
+  local res = zlib.uncompress(buf, buflen, comp, #comp)
+  assert(res == 0)
+  return ffi.string(buf, buflen[0])
+end
+
+-- Simple test code. <span style="color:#f0f4ff;">--</span><span style="color:#4040c0;">&#9318;</span>
+local txt = string.rep("abcd", 1000)
+print("Uncompressed size: ", #txt)
+local c = compress(txt)
+print("Compressed size: ", #c)
+local txt2 = uncompress(c, #txt)
+assert(txt2 == txt)
+</pre>
+<p>
+Here's the step-by-step explanation:
+</p>
+<p>
+<span style="color:#4040c0;">&#9312;</span> This defines some of the
+C&nbsp;functions provided by zlib. For the sake of this example, some
+type indirections have been reduced and it uses the pre-defined
+fixed-size integer types, while still adhering to the zlib API/ABI.
+</p>
+<p>
+<span style="color:#4040c0;">&#9313;</span> This loads the zlib shared
+library. On POSIX systems it's named <tt>libz.so</tt> and usually
+comes pre-installed. Since <tt>ffi.load()</tt> automatically adds any
+missing standard prefixes/suffixes, we can simply load the
+<tt>"z"</tt> library. On Windows it's named <tt>zlib1.dll</tt> and
+you'll have to download it first from the
+<a href="http://zlib.net/"><span class="ext">&raquo;</span>&nbsp;zlib site</a>. The check for
+<tt>ffi.os</tt> makes sure we pass the right name to
+<tt>ffi.load()</tt>.
+</p>
+<p>
+<span style="color:#4040c0;">&#9314;</span> First, the maximum size of
+the compression buffer is obtained by calling the
+<tt>zlib.compressBound</tt> function with the length of the
+uncompressed string. The next line allocates a byte buffer of this
+size. The <tt>[?]</tt> in the type specification indicates a
+variable-length array (VLA). The actual number of elements of this
+array is given as the 2nd argument to <tt>ffi.new()</tt>.
+</p>
+<p>
+<span style="color:#4040c0;">&#9315;</span> This may look strange at
+first, but have a look at the declaration of the <tt>compress2</tt>
+function from zlib: the destination length is defined as a pointer!
+This is because you pass in the maximum buffer size and get back the
+actual length that was used.
+</p>
+<p>
+In C you'd pass in the address of a local variable
+(<tt>&amp;buflen</tt>). But since there's no address-of operator in
+Lua, we'll just pass in a one-element array. Conveniently it can be
+initialized with the maximum buffer size in one step. Calling the
+actual <tt>zlib.compress2</tt> function is then straightforward.
+</p>
+<p>
+<span style="color:#4040c0;">&#9316;</span> We want to return the
+compressed data as a Lua string, so we'll use <tt>ffi.string()</tt>.
+It needs a pointer to the start of the data and the actual length. The
+length has been returned in the <tt>buflen</tt> array, so we'll just
+get it from there.
+</p>
+<p style="font-size: 8pt;">
+Note that since the function returns now, the <tt>buf</tt> and
+<tt>buflen</tt> variables will eventually be garbage collected. This
+is fine, because <tt>ffi.string()</tt> has copied the contents to a
+newly created (interned) Lua string. If you plan to call this function
+lots of times, consider reusing the buffers and/or handing back the
+results in buffers instead of strings. This will reduce the overhead
+for garbage collection and string interning.
+</p>
+<p>
+<span style="color:#4040c0;">&#9317;</span> The <tt>uncompress</tt>
+functions does the exact opposite of the <tt>compress</tt> function.
+The compressed data doesn't include the size of the original string,
+so this needs to be passed in. Otherwise no surprises here.
+</p>
+<p>
+<span style="color:#4040c0;">&#9318;</span> The code, that makes use
+of the functions we just defined, is just plain Lua code. It doesn't
+need to know anything about the LuaJIT FFI &mdash; the convenience
+wrapper functions completely hide it.
+</p>
+<p>
+One major advantage of the LuaJIT FFI is that you are now able to
+write those wrappers <em>in Lua</em>. And at a fraction of the time it
+would cost you to create an extra C&nbsp;module using the Lua/C API.
+Many of the simpler C&nbsp;functions can probably be used directly
+from your Lua code, without any wrappers.
+</p>
+<p style="font-size: 8pt;">
+Side note: the zlib API uses the <tt>long</tt> type for passing
+lengths and sizes around. But all those zlib functions actually only
+deal with 32&nbsp;bit values. This is an unfortunate choice for a
+public API, but may be explained by zlib's history &mdash; we'll just
+have to deal with it.
+</p>
+<p style="font-size: 8pt;">
+First, you should know that a <tt>long</tt> is a 64&nbsp;bit type e.g.
+on POSIX/x64 systems, but a 32&nbsp;bit type on Windows/x64 and on
+32&nbsp;bit systems. Thus a <tt>long</tt> result can be either a plain
+Lua number or a boxed 64&nbsp;bit integer cdata object, depending on
+the target system.
+</p>
+<p style="font-size: 8pt;">
+Ok, so the <tt>ffi.*</tt> functions generally accept cdata objects
+wherever you'd want to use a number. That's why we get a away with
+passing <tt>n</tt> to <tt>ffi.string()</tt> above. But other Lua
+library functions or modules don't know how to deal with this. So for
+maximum portability one needs to use <tt>tonumber()</tt> on returned
+<tt>long</tt> results before passing them on. Otherwise the
+application might work on some systems, but would fail in a POSIX/x64
+environment.
+</p>
+
+<h2 id="idioms">Translating C&nbsp;Idioms</h2>
+<p>
+Here's a list of common C&nbsp;idioms and their translation to the
+LuaJIT FFI:
+</p>
+<table class="idiomtable">
+<tr class="idiomhead">
+<td class="idiomdesc">Idiom</td>
+<td class="idiomc">C&nbsp;code</td>
+<td class="idiomlua">Lua code</td>
+</tr>
+<tr class="odd separate">
+<td class="idiomdesc">Pointer dereference<br><tt>int *p;</tt></td><td class="idiomc"><tt>x = *p;<br>*p = y;</tt></td><td class="idiomlua"><tt>x = <b>p[0]</b><br><b>p[0]</b> = y</tt></td></tr>
+<tr class="even">
+<td class="idiomdesc">Pointer indexing<br><tt>int i, *p;</tt></td><td class="idiomc"><tt>x = p[i];<br>p[i+1] = y;</tt></td><td class="idiomlua"><tt>x = p[i]<br>p[i+1] = y</tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc">Array indexing<br><tt>int i, a[];</tt></td><td class="idiomc"><tt>x = a[i];<br>a[i+1] = y;</tt></td><td class="idiomlua"><tt>x = a[i]<br>a[i+1] = y</tt></td></tr>
+<tr class="even separate">
+<td class="idiomdesc"><tt>struct</tt>/<tt>union</tt> dereference<br><tt>struct foo s;</tt></td><td class="idiomc"><tt>x = s.field;<br>s.field = y;</tt></td><td class="idiomlua"><tt>x = s.field<br>s.field = y</tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc"><tt>struct</tt>/<tt>union</tt> pointer deref.<br><tt>struct foo *sp;</tt></td><td class="idiomc"><tt>x = sp->field;<br>sp->field = y;</tt></td><td class="idiomlua"><tt>x = <b>s.field</b><br><b>s.field</b> = y</tt></td></tr>
+<tr class="even separate">
+<td class="idiomdesc">Pointer arithmetic<br><tt>int i, *p;</tt></td><td class="idiomc"><tt>x = p + i;<br>y = p - i;</tt></td><td class="idiomlua"><tt>x = p + i<br>y = p - i</tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc">Pointer difference<br><tt>int *p1, *p2;</tt></td><td class="idiomc"><tt>x = p1 - p2;</tt></td><td class="idiomlua"><tt>x = p1 - p2</tt></td></tr>
+<tr class="even">
+<td class="idiomdesc">Array element pointer<br><tt>int i, a[];</tt></td><td class="idiomc"><tt>x = &amp;a[i];</tt></td><td class="idiomlua"><tt>x = <b>a+i</b></tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc">Cast pointer to address<br><tt>int *p;</tt></td><td class="idiomc"><tt>x = (intptr_t)p;</tt></td><td class="idiomlua"><tt>x = <b>tonumber(<br>&nbsp;ffi.cast("intptr_t",<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;p))</b></tt></td></tr>
+<tr class="even separate">
+<td class="idiomdesc">Functions with outargs<br><tt>void foo(int *inoutlen);</tt></td><td class="idiomc"><tt>int len = x;<br>foo(&amp;len);<br>y = len;</tt></td><td class="idiomlua"><tt><b>local len =<br>&nbsp;&nbsp;ffi.new("int[1]", x)<br>foo(len)<br>y = len[0]</b></tt></td></tr>
+<tr class="odd">
+<td class="idiomdesc"><a href="ext_ffi_semantics.html#convert_vararg">Vararg conversions</a><br><tt>int printf(char *fmt, ...);</tt></td><td class="idiomc"><tt>printf("%g", 1.0);<br>printf("%d", 1);<br>&nbsp;</tt></td><td class="idiomlua"><tt>printf("%g", 1);<br>printf("%d",<br>&nbsp;&nbsp;<b>ffi.new("int", 1)</b>)</tt></td></tr>
+</table>
+
+<h2 id="cache">To Cache or Not to Cache</h2>
+<p>
+It's a common Lua idiom to cache library functions in local variables
+or upvalues, e.g.:
+</p>
+<pre class="code">
+local byte, char = string.byte, string.char
+local function foo(x)
+  return char(byte(x)+1)
+end
+</pre>
+<p>
+This replaces several hash-table lookups with a (faster) direct use of
+a local or an upvalue. This is less important with LuaJIT, since the
+JIT compiler optimizes hash-table lookups a lot and is even able to
+hoist most of them out of the inner loops. It can't eliminate
+<em>all</em> of them, though, and it saves some typing for often-used
+functions. So there's still a place for this, even with LuaJIT.
+</p>
+<p>
+The situation is a bit different with C&nbsp;function calls via the
+FFI library. The JIT compiler has special logic to eliminate <em>all
+of the lookup overhead</em> for functions resolved from a
+<a href="ext_ffi_semantics.html#clib">C&nbsp;library namespace</a>!
+Thus it's not helpful and actually counter-productive to cache
+individual C&nbsp;functions like this:
+</p>
+<pre class="code">
+local <b>funca</b>, <b>funcb</b> = ffi.C.funcb, ffi.C.funcb -- <span style="color:#c00000;">Not helpful!</span>
+local function foo(x, n)
+  for i=1,n do <b>funcb</b>(<b>funca</b>(x, i), 1) end
+end
+</pre>
+<p>
+This turns them into indirect calls and generates bigger and slower
+machine code. Instead you'll want to cache the namespace itself and
+rely on the JIT compiler to eliminate the lookups:
+</p>
+<pre class="code">
+local <b>C</b> = ffi.C          -- <span style="color:#00a000;">Instead use this!</span>
+local function foo(x, n)
+  for i=1,n do <b>C.funcb</b>(<b>C.funca</b>(x, i), 1) end
+end
+</pre>
+<p>
+This generates both shorter and faster code. So <b>don't cache
+C&nbsp;functions</b>, but <b>do</b> cache namespaces! Most often the
+namespace is already in a local variable at an outer scope, e.g. from
+<tt>local&nbsp;lib&nbsp;=&nbsp;ffi.load(...)</tt>. Note that copying
+it to a local variable in the function scope is unnecessary.
+</p>
 <br class="flush">
 </div>
 <div id="foot">
diff --git a/doc/ext_jit.html b/doc/ext_jit.html
index 36e306a8..e8f5518e 100644
--- a/doc/ext_jit.html
+++ b/doc/ext_jit.html
@@ -33,8 +33,6 @@
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/extensions.html b/doc/extensions.html
index 8385727b..7d12299c 100644
--- a/doc/extensions.html
+++ b/doc/extensions.html
@@ -50,8 +50,6 @@ td.excinterop {
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/faq.html b/doc/faq.html
index 4ec69882..8de8c3f1 100644
--- a/doc/faq.html
+++ b/doc/faq.html
@@ -36,8 +36,6 @@ dd { margin-left: 1.5em; }
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/install.html b/doc/install.html
index 55de1bd8..220c326f 100644
--- a/doc/install.html
+++ b/doc/install.html
@@ -62,8 +62,6 @@ td.compatno {
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/luajit.html b/doc/luajit.html
index 131e4396..108ca18e 100644
--- a/doc/luajit.html
+++ b/doc/luajit.html
@@ -34,8 +34,6 @@
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/running.html b/doc/running.html
index cb96eabb..ba55e56d 100644
--- a/doc/running.html
+++ b/doc/running.html
@@ -55,8 +55,6 @@ td.param_default {
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
diff --git a/doc/status.html b/doc/status.html
index 5ce3bf4c..eddbfb24 100644
--- a/doc/status.html
+++ b/doc/status.html
@@ -36,8 +36,6 @@ ul li { padding-bottom: 0.3em; }
 </li><li>
 <a href="ext_ffi_api.html">ffi.* API</a>
 </li><li>
-<a href="ext_ffi_int64.html">64 bit Integers</a>
-</li><li>
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>

Idiom	C code	Lua code
Pointer dereference `int *p;`	`x = p; p = y;`	`x = p[0] p[0] = y`
Pointer indexing `int i, *p;`	`x = p[i]; p[i+1] = y;`	`x = p[i] p[i+1] = y`
Array indexing `int i, a[];`	`x = a[i]; a[i+1] = y;`	`x = a[i] a[i+1] = y`
`struct`/`union` dereference `struct foo s;`	`x = s.field; s.field = y;`	`x = s.field s.field = y`
`struct`/`union` pointer deref. `struct foo *sp;`	`x = sp->field; sp->field = y;`	`x = s.field s.field = y`
Pointer arithmetic `int i, *p;`	`x = p + i; y = p - i;`	`x = p + i y = p - i`
Pointer difference `int p1, p2;`	`x = p1 - p2;`	`x = p1 - p2`
Array element pointer `int i, a[];`	`x = &a[i];`	`x = a+i`
Cast pointer to address `int *p;`	`x = (intptr_t)p;`	`x = tonumber( ffi.cast("intptr_t", p))`
Functions with outargs `void foo(int *inoutlen);`	`int len = x; foo(&len); y = len;`	`local len = ffi.new("int[1]", x) foo(len) y = len[0]`
Vararg conversions `int printf(char *fmt, ...);`	`printf("%g", 1.0); printf("%d", 1);`	`printf("%g", 1); printf("%d", ffi.new("int", 1))`