From d48555b36ac512161b81f9b6bca7bea16a0cd806 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter.wuille@gmail.com>
Date: Tue, 18 Nov 2014 18:06:36 +0100
Subject: [PATCH] Squashed 'src/secp256k1/' content from commit ad2028f

git-subtree-dir: src/secp256k1
git-subtree-split: ad2028f9890ca40bdd32055aa0fe5c1c9af0e485
---
 .gitignore                                |   35 +
 .travis.yml                               |   25 +
 COPYING                                   |   19 +
 Makefile.am                               |   88 ++
 README.md                                 |   55 ++
 TODO                                      |    3 +
 autogen.sh                                |    3 +
 build-aux/m4/bitcoin_secp.m4              |   90 ++
 configure.ac                              |  259 +++++
 include/secp256k1.h                       |  252 +++++
 libsecp256k1.pc.in                        |   13 +
 nasm_lt.sh                                |   57 ++
 obj/.gitignore                            |    0
 src/bench_inv.c                           |   41 +
 src/bench_sign.c                          |   49 +
 src/bench_verify.c                        |   44 +
 src/ecdsa.h                               |   23 +
 src/ecdsa_impl.h                          |  183 ++++
 src/eckey.h                               |   25 +
 src/eckey_impl.h                          |  200 ++++
 src/ecmult.h                              |   19 +
 src/ecmult_gen.h                          |   19 +
 src/ecmult_gen_impl.h                     |  118 +++
 src/ecmult_impl.h                         |  222 +++++
 src/field.h                               |  114 +++
 src/field_10x26.h                         |   21 +
 src/field_10x26_impl.h                    |  884 +++++++++++++++++
 src/field_5x52.h                          |   21 +
 src/field_5x52_asm.asm                    |  469 +++++++++
 src/field_5x52_asm_impl.h                 |   13 +
 src/field_5x52_impl.h                     |  260 +++++
 src/field_5x52_int128_impl.h              |  279 ++++++
 src/field_gmp.h                           |   18 +
 src/field_gmp_impl.h                      |  163 ++++
 src/field_impl.h                          |  293 ++++++
 src/group.h                               |  128 +++
 src/group_impl.h                          |  519 ++++++++++
 src/java/org/bitcoin/NativeSecp256k1.java |   60 ++
 src/java/org_bitcoin_NativeSecp256k1.c    |   23 +
 src/java/org_bitcoin_NativeSecp256k1.h    |   21 +
 src/num.h                                 |  100 ++
 src/num_gmp.h                             |   20 +
 src/num_gmp_impl.h                        |  376 +++++++
 src/num_impl.h                            |   22 +
 src/scalar.h                              |   63 ++
 src/scalar_4x64.h                         |   17 +
 src/scalar_4x64_impl.h                    |  359 +++++++
 src/scalar_8x32.h                         |   17 +
 src/scalar_8x32_impl.h                    |  572 +++++++++++
 src/scalar_impl.h                         |  184 ++++
 src/secp256k1.c                           |  305 ++++++
 src/testrand.h                            |   26 +
 src/testrand_impl.h                       |   60 ++
 src/tests.c                               | 1080 +++++++++++++++++++++
 src/util.h                                |   64 ++
 55 files changed, 8393 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .travis.yml
 create mode 100644 COPYING
 create mode 100644 Makefile.am
 create mode 100644 README.md
 create mode 100644 TODO
 create mode 100755 autogen.sh
 create mode 100644 build-aux/m4/bitcoin_secp.m4
 create mode 100644 configure.ac
 create mode 100644 include/secp256k1.h
 create mode 100644 libsecp256k1.pc.in
 create mode 100755 nasm_lt.sh
 create mode 100644 obj/.gitignore
 create mode 100644 src/bench_inv.c
 create mode 100644 src/bench_sign.c
 create mode 100644 src/bench_verify.c
 create mode 100644 src/ecdsa.h
 create mode 100644 src/ecdsa_impl.h
 create mode 100644 src/eckey.h
 create mode 100644 src/eckey_impl.h
 create mode 100644 src/ecmult.h
 create mode 100644 src/ecmult_gen.h
 create mode 100644 src/ecmult_gen_impl.h
 create mode 100644 src/ecmult_impl.h
 create mode 100644 src/field.h
 create mode 100644 src/field_10x26.h
 create mode 100644 src/field_10x26_impl.h
 create mode 100644 src/field_5x52.h
 create mode 100644 src/field_5x52_asm.asm
 create mode 100644 src/field_5x52_asm_impl.h
 create mode 100644 src/field_5x52_impl.h
 create mode 100644 src/field_5x52_int128_impl.h
 create mode 100644 src/field_gmp.h
 create mode 100644 src/field_gmp_impl.h
 create mode 100644 src/field_impl.h
 create mode 100644 src/group.h
 create mode 100644 src/group_impl.h
 create mode 100644 src/java/org/bitcoin/NativeSecp256k1.java
 create mode 100644 src/java/org_bitcoin_NativeSecp256k1.c
 create mode 100644 src/java/org_bitcoin_NativeSecp256k1.h
 create mode 100644 src/num.h
 create mode 100644 src/num_gmp.h
 create mode 100644 src/num_gmp_impl.h
 create mode 100644 src/num_impl.h
 create mode 100644 src/scalar.h
 create mode 100644 src/scalar_4x64.h
 create mode 100644 src/scalar_4x64_impl.h
 create mode 100644 src/scalar_8x32.h
 create mode 100644 src/scalar_8x32_impl.h
 create mode 100644 src/scalar_impl.h
 create mode 100644 src/secp256k1.c
 create mode 100644 src/testrand.h
 create mode 100644 src/testrand_impl.h
 create mode 100644 src/tests.c
 create mode 100644 src/util.h

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000..f0a54077a5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,35 @@
+bench_inv
+bench_sign
+bench_verify
+tests
+*.exe
+*.so
+*.a
+!.gitignore
+
+Makefile
+configure
+.libs/
+Makefile.in
+aclocal.m4
+autom4te.cache/
+config.log
+config.status
+*.tar.gz
+*.la
+libtool
+.deps/
+.dirstamp
+build-aux/
+*.lo
+*.o
+*~
+src/libsecp256k1-config.h
+src/libsecp256k1-config.h.in
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
+src/stamp-h1
+libsecp256k1.pc
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000000..24a86b561b
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,25 @@
+language: cpp
+compiler: gcc
+install:
+  - sudo apt-get install -qq libssl-dev
+  - if [ "$BIGNUM" = "gmp" -o "$BIGNUM" = "auto" -o "$FIELD" = "gmp" ]; then sudo apt-get install -qq libgmp-dev; fi
+  - if [ "$FIELD" = "64bit_asm" ]; then sudo apt-get install -qq yasm; fi
+env:
+  global:
+    - FIELD=auto  BIGNUM=auto  SCALAR=auto  ENDOMORPHISM=no  BUILD=check  EXTRAFLAGS=
+  matrix:
+    - SCALAR=32bit
+    - SCALAR=64bit
+    - FIELD=gmp
+    - FIELD=gmp       ENDOMORPHISM=yes
+    - FIELD=64bit_asm
+    - FIELD=64bit_asm ENDOMORPHISM=yes
+    - FIELD=64bit
+    - FIELD=64bit     ENDOMORPHISM=yes
+    - FIELD=32bit
+    - FIELD=32bit     ENDOMORPHISM=yes
+    - BUILD=distcheck
+    - EXTRAFLAGS=CFLAGS=-DDETERMINISTIC
+before_script: ./autogen.sh
+script: ./configure --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR $EXTRAFLAGS && make -j2 $BUILD
+os: linux
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000000..4522a5990e
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,19 @@
+Copyright (c) 2013 Pieter Wuille
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000000..d527da6b77
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,88 @@
+ACLOCAL_AMFLAGS = -I build-aux/m4
+
+lib_LTLIBRARIES = libsecp256k1.la
+if USE_ASM
+COMMON_LIB = libsecp256k1_common.la
+else
+COMMON_LIB =
+endif
+noinst_LTLIBRARIES = $(COMMON_LIB)
+include_HEADERS = include/secp256k1.h
+noinst_HEADERS =
+noinst_HEADERS += src/scalar.h
+noinst_HEADERS += src/scalar_4x64.h
+noinst_HEADERS += src/scalar_8x32.h
+noinst_HEADERS += src/scalar_impl.h
+noinst_HEADERS += src/scalar_4x64_impl.h
+noinst_HEADERS += src/scalar_8x32_impl.h
+noinst_HEADERS += src/group.h
+noinst_HEADERS += src/group_impl.h
+noinst_HEADERS += src/num_gmp.h
+noinst_HEADERS += src/num_gmp_impl.h
+noinst_HEADERS += src/ecdsa.h
+noinst_HEADERS += src/ecdsa_impl.h
+noinst_HEADERS += src/eckey.h
+noinst_HEADERS += src/eckey_impl.h
+noinst_HEADERS += src/ecmult.h
+noinst_HEADERS += src/ecmult_impl.h
+noinst_HEADERS += src/ecmult_gen.h
+noinst_HEADERS += src/ecmult_gen_impl.h
+noinst_HEADERS += src/num.h
+noinst_HEADERS += src/num_impl.h
+noinst_HEADERS += src/field_10x26.h
+noinst_HEADERS += src/field_10x26_impl.h
+noinst_HEADERS += src/field_5x52.h
+noinst_HEADERS += src/field_5x52_impl.h
+noinst_HEADERS += src/field_5x52_int128_impl.h
+noinst_HEADERS += src/field_5x52_asm_impl.h
+noinst_HEADERS += src/java/org_bitcoin_NativeSecp256k1.h
+noinst_HEADERS += src/util.h
+noinst_HEADERS += src/testrand.h
+noinst_HEADERS += src/testrand_impl.h
+noinst_HEADERS += src/field_gmp.h
+noinst_HEADERS += src/field_gmp_impl.h
+noinst_HEADERS += src/field.h
+noinst_HEADERS += src/field_impl.h
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libsecp256k1.pc
+
+if USE_ASM
+libsecp256k1_common_la_SOURCES = src/field_5x52_asm.asm
+endif
+
+libsecp256k1_la_SOURCES = src/secp256k1.c
+libsecp256k1_la_CPPFLAGS = -I$(top_srcdir)/include $(SECP_INCLUDES)
+libsecp256k1_la_LIBADD = $(COMMON_LIB) $(SECP_LIBS)
+
+
+noinst_PROGRAMS =
+if USE_BENCHMARK
+noinst_PROGRAMS += bench_verify bench_sign bench_inv
+bench_verify_SOURCES = src/bench_verify.c
+bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS)
+bench_verify_LDFLAGS = -static
+bench_sign_SOURCES = src/bench_sign.c
+bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS)
+bench_sign_LDFLAGS = -static
+bench_inv_SOURCES = src/bench_inv.c
+bench_inv_LDADD = $(COMMON_LIB) $(SECP_LIBS)
+bench_inv_LDFLAGS = -static
+endif
+
+if USE_TESTS
+noinst_PROGRAMS += tests
+tests_SOURCES = src/tests.c
+tests_CPPFLAGS = -DVERIFY $(SECP_TEST_INCLUDES)
+tests_LDADD = $(COMMON_LIB) $(SECP_LIBS) $(SECP_TEST_LIBS)
+tests_LDFLAGS = -static
+TESTS = tests
+endif
+
+EXTRA_DIST = autogen.sh nasm_lt.sh
+
+#x86_64 only
+if USE_ASM
+.asm.lo:
+	$(LIBTOOL) --mode=compile --tag YASM $(srcdir)/nasm_lt.sh $(YASM) -f $(YASM_BINFMT) $(YAFLAGS) -I$(srcdir) -I. $< -o $@
+endif
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..1e49f49416
--- /dev/null
+++ b/README.md
@@ -0,0 +1,55 @@
+libsecp256k1
+============
+
+[![Build Status](https://travis-ci.org/bitcoin/secp256k1.svg?branch=master)](https://travis-ci.org/bitcoin/secp256k1)
+
+Optimized C library for EC operations on curve secp256k1.
+
+This library is experimental, so use at your own risk.
+
+Features:
+* Low-level field and group operations on secp256k1.
+* ECDSA signing/verification and key generation.
+* Adding/multiplying private/public keys.
+* Serialization/parsing of private keys, public keys, signatures.
+* Very efficient implementation.
+
+Implementation details
+----------------------
+
+* General
+  * Avoid dynamic memory usage almost everywhere.
+* Field operations
+  * Optimized implementation of arithmetic modulo the curve's field size (2^256 - 0x1000003D1).
+    * Using 5 52-bit limbs (including hand-optimized assembly for x86_64, by Diederik Huys).
+    * Using 10 26-bit limbs.
+    * Using GMP.
+  * Field inverses and square roots using a sliding window over blocks of 1s (by Peter Dettman).
+* Scalar operations
+  * Optimized implementation without data-dependent branches of arithmetic modulo the curve's order.
+    * Using 4 64-bit limbs (relying on __int128 support in the compiler).
+    * Using 8 32-bit limbs.
+* Group operations
+  * Point addition formula specifically simplified for the curve equation (y^2 = x^3 + 7).
+  * Use addition between points in Jacobian and affine coordinates where possible.
+  * Use a unified addition/doubling formula where necessary to avoid data-dependent branches.
+* Point multiplication for verification (a*P + b*G).
+  * Use wNAF notation for point multiplicands.
+  * Use a much larger window for multiples of G, using precomputed multiples.
+  * Use Shamir's trick to do the multiplication with the public key and the generator simultaneously.
+  * Optionally use secp256k1's efficiently-computable endomorphism to split the multiplicands into 4 half-sized ones first.
+* Point multiplication for signing
+  * Use a precomputed table of multiples of powers of 16 multiplied with the generator, so general multiplication becomes a series of additions.
+  * Slice the precomputed table in memory per byte, so memory access to the table becomes uniform.
+  * No data-dependent branches
+  * The precomputed tables add and eventually subtract points for which no known scalar (private key) is known, preventing even an attacker with control over the private key used to control the data internally.
+
+Build steps
+-----------
+
+libsecp256k1 is built using autotools:
+
+    $ ./autogen.sh
+    $ ./configure
+    $ make
+    $ sudo make install  # optional
diff --git a/TODO b/TODO
new file mode 100644
index 0000000000..a300e1c5eb
--- /dev/null
+++ b/TODO
@@ -0,0 +1,3 @@
+* Unit tests for fieldelem/groupelem, including ones intended to
+  trigger fieldelem's boundary cases.
+* Complete constant-time operations for signing/keygen
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000000..65286b9353
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+set -e
+autoreconf -if --warnings=all
diff --git a/build-aux/m4/bitcoin_secp.m4 b/build-aux/m4/bitcoin_secp.m4
new file mode 100644
index 0000000000..e6f3470ed7
--- /dev/null
+++ b/build-aux/m4/bitcoin_secp.m4
@@ -0,0 +1,90 @@
+dnl libsecp25k1 helper checks
+AC_DEFUN([SECP_INT128_CHECK],[
+has_int128=$ac_cv_type___int128
+if test x"$has_int128" != x"yes" && test x"$set_field" = x"64bit"; then
+  AC_MSG_ERROR([$set_field field support explicitly requested but is not compatible with this host])
+fi
+if test x"$has_int128" != x"yes" && test x"$set_scalar" = x"64bit"; then
+  AC_MSG_ERROR([$set_scalar scalar support explicitly requested but is not compatible with this host])
+fi
+])
+
+dnl 
+AC_DEFUN([SECP_64BIT_ASM_CHECK],[
+if test x"$host_cpu" == x"x86_64"; then
+  AC_CHECK_PROG(YASM, yasm, yasm)
+else
+  if test x"$set_field" = x"64bit_asm"; then
+    AC_MSG_ERROR([$set_field field support explicitly requested but is not compatible with this host])
+  fi
+fi
+if test x$YASM = x; then
+  if test x"$set_field" = x"64bit_asm"; then
+    AC_MSG_ERROR([$set_field field support explicitly requested but yasm was not found])
+  fi
+  has_64bit_asm=no
+else
+  case x"$host_os" in
+  xdarwin*)
+    YASM_BINFMT=macho64
+    ;;
+  x*-gnux32)
+    YASM_BINFMT=elfx32
+    ;;
+  *)
+    YASM_BINFMT=elf64
+    ;;
+  esac
+  if $YASM -f help | grep -q $YASM_BINFMT; then
+    has_64bit_asm=yes
+  else
+    if test x"$set_field" = x"64bit_asm"; then
+      AC_MSG_ERROR([$set_field field support explicitly requested but yasm doesn't support $YASM_BINFMT format])
+    fi
+    AC_MSG_WARN([yasm too old for $YASM_BINFMT format])
+    has_64bit_asm=no
+  fi
+fi
+])
+
+dnl
+AC_DEFUN([SECP_OPENSSL_CHECK],[
+if test x"$use_pkgconfig" = x"yes"; then
+    : #NOP
+  m4_ifdef([PKG_CHECK_MODULES],[
+    PKG_CHECK_MODULES([CRYPTO], [libcrypto], [has_libcrypto=yes; AC_DEFINE(HAVE_LIBCRYPTO,1,[Define this symbol if libcrypto is installed])],[has_libcrypto=no])
+    : #NOP
+  ])
+else
+  AC_CHECK_HEADER(openssl/crypto.h,[AC_CHECK_LIB(crypto, main,[has_libcrypto=yes; CRYPTO_LIBS=-lcrypto; AC_DEFINE(HAVE_LIBCRYPTO,1,[Define this symbol if libcrypto is installed])]
+)])
+  LIBS=
+fi
+if test x"$has_libcrypto" == x"yes" && test x"$has_openssl_ec" = x; then
+  AC_MSG_CHECKING(for EC functions in libcrypto)
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+    #include <openssl/ec.h>
+    #include <openssl/ecdsa.h>
+    #include <openssl/obj_mac.h>]],[[
+    EC_KEY *eckey = EC_KEY_new_by_curve_name(NID_secp256k1);
+    ECDSA_sign(0, NULL, 0, NULL, NULL, eckey);
+    ECDSA_verify(0, NULL, 0, NULL, 0, eckey);
+    EC_KEY_free(eckey);
+  ]])],[has_openssl_ec=yes],[has_openssl_ec=no])
+  AC_MSG_RESULT([$has_openssl_ec])
+fi
+])
+
+dnl
+AC_DEFUN([SECP_GMP_CHECK],[
+if test x"$has_gmp" != x"yes"; then
+  AC_CHECK_HEADER(gmp.h,[AC_CHECK_LIB(gmp, __gmpz_init,[has_gmp=yes; GMP_LIBS=-lgmp; AC_DEFINE(HAVE_LIBGMP,1,[Define this symbol if libgmp is installed])])])
+fi
+if test x"$set_field" = x"gmp" && test x"$has_gmp" != x"yes"; then
+    AC_MSG_ERROR([$set_field field support explicitly requested but libgmp was not found])
+fi
+if test x"$set_bignum" = x"gmp" && test x"$has_gmp" != x"yes"; then
+    AC_MSG_ERROR([$set_bignum field support explicitly requested but libgmp was not found])
+fi
+])
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000000..2da5709834
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,259 @@
+AC_PREREQ([2.60])
+AC_INIT([libsecp256k1],[0.1])
+AC_CONFIG_AUX_DIR([build-aux])
+AC_CONFIG_MACRO_DIR([build-aux/m4])
+AC_CANONICAL_HOST
+AH_TOP([#ifndef LIBSECP256K1_CONFIG_H])
+AH_TOP([#define LIBSECP256K1_CONFIG_H])
+AH_BOTTOM([#endif //LIBSECP256K1_CONFIG_H])
+AM_INIT_AUTOMAKE([foreign])
+LT_INIT
+
+dnl make the compilation flags quiet unless V=1 is used
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
+PKG_PROG_PKG_CONFIG
+
+AC_PATH_TOOL(AR, ar)
+AC_PATH_TOOL(RANLIB, ranlib)
+AC_PATH_TOOL(STRIP, strip)
+
+AC_PROG_CC_C99
+if test x"$ac_cv_prog_cc_c99" == x"no"; then
+  AC_MSG_ERROR([c99 compiler support required])
+fi
+
+case $host in
+  *mingw*)
+     use_pkgconfig=no
+     ;;
+   *)
+     use_pkgconfig=yes
+     ;;
+esac
+
+case $host_os in
+  darwin*)
+     CPPFLAGS="$CPPFLAGS -I/opt/local/include"
+     LDFLAGS="$LDFLAGS -L/opt/local/lib"
+     ;;
+esac
+
+CFLAGS="$CFLAGS -W"
+
+warn_CFLAGS="-Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -Wno-unused-function"
+saved_CFLAGS="$CFLAGS"
+CFLAGS="$CFLAGS $warn_CFLAGS"
+AC_MSG_CHECKING([if ${CC} supports ${warn_CFLAGS}])
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[char foo;]])],
+    [ AC_MSG_RESULT([yes]) ],
+    [ AC_MSG_RESULT([no])
+      CFLAGS="$saved_CFLAGS"
+    ])
+
+
+AC_ARG_ENABLE(benchmark,
+    AS_HELP_STRING([--enable-benchmark],[compile benchmark (default is yes)]),
+    [use_benchmark=$enableval],
+    [use_benchmark=yes])
+
+AC_ARG_ENABLE(tests,
+    AS_HELP_STRING([--enable-tests],[compile tests (default is yes)]),
+    [use_tests=$enableval],
+    [use_tests=yes])
+
+AC_ARG_ENABLE(endomorphism,
+    AS_HELP_STRING([--enable-endomorphism],[enable endomorphism (default is no)]),
+    [use_endomorphism=$enableval],
+    [use_endomorphism=no])
+
+AC_ARG_WITH([field], [AS_HELP_STRING([--with-field=gmp|64bit|64bit_asm|32bit|auto],
+[Specify Field Implementation. Default is auto])],[req_field=$withval], [req_field=auto])
+
+AC_ARG_WITH([bignum], [AS_HELP_STRING([--with-bignum=gmp|auto],
+[Specify Bignum Implementation. Default is auto])],[req_bignum=$withval], [req_bignum=auto])
+
+AC_ARG_WITH([scalar], [AS_HELP_STRING([--with-scalar=64bit|32bit|auto],
+[Specify scalar implementation. Default is auto])],[req_scalar=$withval], [req_scalar=auto])
+
+AC_CHECK_TYPES([__int128])
+
+AC_CHECK_DECL(__builtin_expect,AC_DEFINE(HAVE_BUILTIN_EXPECT,1,[Define this symbol if __builtin_expect is available]),,)
+
+if test x"$req_field" = x"auto"; then
+  SECP_64BIT_ASM_CHECK
+  if test x"$has_64bit_asm" = x"yes"; then
+    set_field=64bit_asm
+  fi
+
+  if test x"$set_field" = x; then
+    SECP_INT128_CHECK
+    if test x"$has_int128" = x"yes"; then
+      set_field=64bit
+    fi
+  fi
+
+  if test x"$set_field" = x; then
+    SECP_GMP_CHECK
+    if test x"$has_gmp" = x"yes"; then
+      set_field=gmp
+    fi
+  fi
+
+  if test x"$set_field" = x; then
+    set_field=32bit
+  fi
+else
+  set_field=$req_field
+  case $set_field in
+  64bit_asm)
+    SECP_64BIT_ASM_CHECK
+    ;;
+  64bit)
+    SECP_INT128_CHECK
+    ;;
+  gmp)
+    SECP_GMP_CHECK
+    ;;
+  32bit)
+    ;;
+  *)
+    AC_MSG_ERROR([invalid field implementation selection])
+    ;;
+  esac
+fi
+
+if test x"$req_scalar" = x"auto"; then
+  if test x"$set_scalar" = x; then
+    SECP_INT128_CHECK
+    if test x"$has_int128" = x"yes"; then
+      set_scalar=64bit
+    fi
+  fi
+  if test x"$set_scalar" = x; then
+    set_scalar=32bit
+  fi
+else
+  set_scalar=$req_scalar
+  case $set_scalar in
+  64bit)
+    SECP_INT128_CHECK
+    ;;
+  32bit)
+    ;;
+  *)
+    AC_MSG_ERROR([invalid scalar implementation selected])
+    ;;
+  esac
+fi
+
+if test x"$req_bignum" = x"auto"; then
+  SECP_GMP_CHECK
+  if test x"$has_gmp" = x"yes"; then
+    set_bignum=gmp
+  fi
+
+  if test x"$set_bignum" = x; then
+    AC_MSG_ERROR([no working bignum implementation found])
+  fi
+else
+  set_bignum=$req_bignum
+  case $set_bignum in
+  gmp)
+    SECP_GMP_CHECK
+    ;;
+  openssl)
+    SECP_OPENSSL_CHECK
+    ;;
+  *)
+    AC_MSG_ERROR([invalid bignum implementation selection])
+    ;;
+  esac
+fi
+
+# select field implementation
+case $set_field in
+64bit_asm)
+  AC_DEFINE(USE_FIELD_5X52_ASM, 1, [Define this symbol to use the assembly version for the 5x52 field implementation])
+  AC_DEFINE(USE_FIELD_5X52, 1, [Define this symbol to use the FIELD_5X52 implementation])
+  ;;
+64bit)
+  AC_DEFINE(USE_FIELD_5X52_INT128, 1, [Define this symbol to use the __int128 version for the 5x52 field implementation])
+  AC_DEFINE(USE_FIELD_5X52, 1, [Define this symbol to use the FIELD_5X52 implementation])
+  ;;
+gmp)
+  AC_DEFINE(HAVE_LIBGMP,1,[Define this symbol if libgmp is installed])
+  AC_DEFINE(USE_FIELD_GMP, 1, [Define this symbol to use the FIELD_GMP implementation])
+  ;;
+32bit)
+  AC_DEFINE(USE_FIELD_10X26, 1, [Define this symbol to use the FIELD_10X26 implementation])
+  ;;
+*)
+  AC_MSG_ERROR([invalid field implementation])
+  ;;
+esac
+
+# select bignum implementation
+case $set_bignum in
+gmp)
+  AC_DEFINE(HAVE_LIBGMP,1,[Define this symbol if libgmp is installed])
+  AC_DEFINE(USE_NUM_GMP, 1, [Define this symbol to use the gmp implementation])
+  AC_DEFINE(USE_FIELD_INV_NUM, 1, [Define this symbol to use the USE_FIELD_INV_NUM implementation])
+  ;;
+*)
+  AC_MSG_ERROR([invalid bignum implementation])
+  ;;
+esac
+
+#select scalar implementation
+case $set_scalar in
+64bit)
+  AC_DEFINE(USE_SCALAR_4X64, 1, [Define this symbol to use the 4x64 scalar implementation])
+  ;;
+32bit)
+  AC_DEFINE(USE_SCALAR_8X32, 1, [Define this symbol to use the 8x32 scalar implementation])
+  ;;
+*)
+  AC_MSG_ERROR([invalid scalar implementation])
+  ;;
+esac
+
+if test x"$use_tests" = x"yes"; then
+  SECP_OPENSSL_CHECK
+  if test x"$has_openssl_ec" == x"yes"; then
+    AC_DEFINE(ENABLE_OPENSSL_TESTS, 1, [Define this symbol if OpenSSL EC functions are available])
+    SECP_TEST_INCLUDES="$SSL_CFLAGS $CRYPTO_CFLAGS"
+    SECP_TEST_LIBS="$CRYPTO_LIBS"
+
+    case $host in
+    *mingw*)
+      SECP_TEST_LIBS="$SECP_TEST_LIBS -lgdi32"
+      ;;
+    esac
+
+  fi
+fi
+
+if test x"$set_field" = x"gmp" || test x"$set_bignum" = x"gmp"; then
+  SECP_LIBS="$SECP_LIBS $GMP_LIBS"
+fi
+
+if test x"$use_endomorphism" = x"yes"; then
+  AC_DEFINE(USE_ENDOMORPHISM, 1, [Define this symbol to use endomorphism])
+fi
+
+AC_MSG_NOTICE([Using field implementation: $set_field])
+AC_MSG_NOTICE([Using bignum implementation: $set_bignum])
+AC_MSG_NOTICE([Using scalar implementation: $set_scalar])
+
+AC_CONFIG_HEADERS([src/libsecp256k1-config.h])
+AC_CONFIG_FILES([Makefile libsecp256k1.pc])
+AC_SUBST(SECP_INCLUDES)
+AC_SUBST(SECP_LIBS)
+AC_SUBST(SECP_TEST_LIBS)
+AC_SUBST(SECP_TEST_INCLUDES)
+AC_SUBST(YASM_BINFMT)
+AM_CONDITIONAL([USE_ASM], [test x"$set_field" == x"64bit_asm"])
+AM_CONDITIONAL([USE_TESTS], [test x"$use_tests" != x"no"])
+AM_CONDITIONAL([USE_BENCHMARK], [test x"$use_benchmark" != x"no"])
+AC_OUTPUT
diff --git a/include/secp256k1.h b/include/secp256k1.h
new file mode 100644
index 0000000000..932bf0279f
--- /dev/null
+++ b/include/secp256k1.h
@@ -0,0 +1,252 @@
+#ifndef _SECP256K1_
+# define _SECP256K1_
+
+# ifdef __cplusplus
+extern "C" {
+# endif
+
+# if !defined(SECP256K1_GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define SECP256K1_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define SECP256K1_GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+#  if SECP256K1_GNUC_PREREQ(3,0)
+#   define SECP256K1_RESTRICT __restrict__
+#  elif (defined(_MSC_VER) && _MSC_VER >= 1400)
+#   define SECP256K1_RESTRICT __restrict
+#  else
+#   define SECP256K1_RESTRICT
+#  endif
+# else
+#  define SECP256K1_RESTRICT restrict
+# endif
+
+# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+#  if SECP256K1_GNUC_PREREQ(2,7)
+#   define SECP256K1_INLINE __inline__
+#  elif (defined(_MSC_VER))
+#   define SECP256K1_INLINE __inline
+#  else
+#   define SECP256K1_INLINE
+#  endif
+# else
+#  define SECP256K1_INLINE inline
+# endif
+
+/**Warning attributes
+  * NONNULL is not used if SECP256K1_BUILD is set to avoid the compiler optimizing out
+  * some paranoid null checks. */
+# if defined(__GNUC__) && SECP256K1_GNUC_PREREQ(3, 4)
+#  define SECP256K1_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
+# else
+#  define SECP256K1_WARN_UNUSED_RESULT
+# endif
+# if !defined(SECP256K1_BUILD) && defined(__GNUC__) && SECP256K1_GNUC_PREREQ(3, 4)
+#  define SECP256K1_ARG_NONNULL(_x)  __attribute__ ((__nonnull__(_x)))
+# else
+#  define SECP256K1_ARG_NONNULL(_x)
+# endif
+
+
+/** Flags to pass to secp256k1_start. */
+# define SECP256K1_START_VERIFY (1 << 0)
+# define SECP256K1_START_SIGN   (1 << 1)
+
+/** Initialize the library. This may take some time (10-100 ms).
+ *  You need to call this before calling any other function.
+ *  It cannot run in parallel with any other functions, but once
+ *  secp256k1_start() returns, all other functions are thread-safe.
+ */
+void secp256k1_start(unsigned int flags);
+
+/** Free all memory associated with this library. After this, no
+ *  functions can be called anymore, except secp256k1_start()
+ */
+void secp256k1_stop(void);
+
+/** Verify an ECDSA signature.
+ *  Returns: 1: correct signature
+ *           0: incorrect signature
+ *          -1: invalid public key
+ *          -2: invalid signature
+ * In:       msg:       the message being verified (cannot be NULL)
+ *           msglen:    the length of the message (at most 32)
+ *           sig:       the signature being verified (cannot be NULL)
+ *           siglen:    the length of the signature
+ *           pubkey:    the public key to verify with (cannot be NULL)
+ *           pubkeylen: the length of pubkey
+ * Requires starting using SECP256K1_START_VERIFY.
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify(
+  const unsigned char *msg,
+  int msglen,
+  const unsigned char *sig,
+  int siglen,
+  const unsigned char *pubkey,
+  int pubkeylen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(5);
+
+/** Create an ECDSA signature.
+ *  Returns: 1: signature created
+ *           0: nonce invalid, try another one
+ *  In:      msg:    the message being signed (cannot be NULL)
+ *           msglen: the length of the message being signed (at most 32)
+ *           seckey: pointer to a 32-byte secret key (cannot be NULL, assumed to be valid)
+ *           nonce:  pointer to a 32-byte nonce (cannot be NULL, generated with a cryptographic PRNG)
+ *  Out:     sig:    pointer to an array where the signature will be placed (cannot be NULL)
+ *  In/Out:  siglen: pointer to an int with the length of sig, which will be updated
+ *                   to contain the actual signature length (<=72).
+ * Requires starting using SECP256K1_START_SIGN.
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign(
+  const unsigned char *msg,
+  int msglen,
+  unsigned char *sig,
+  int *siglen,
+  const unsigned char *seckey,
+  const unsigned char *nonce
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5) SECP256K1_ARG_NONNULL(6);
+
+/** Create a compact ECDSA signature (64 byte + recovery id).
+ *  Returns: 1: signature created
+ *           0: nonce invalid, try another one
+ *  In:      msg:    the message being signed (cannot be NULL)
+ *           msglen: the length of the message being signed (at most 32)
+ *           seckey: pointer to a 32-byte secret key (cannot be NULL, assumed to be valid)
+ *           nonce:  pointer to a 32-byte nonce (cannot be NULL, generated with a cryptographic PRNG)
+ *  Out:     sig:    pointer to a 64-byte array where the signature will be placed (cannot be NULL)
+ *           recid:  pointer to an int, which will be updated to contain the recovery id (can be NULL)
+ * Requires starting using SECP256K1_START_SIGN.
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign_compact(
+  const unsigned char *msg,
+  int msglen,
+  unsigned char *sig64,
+  const unsigned char *seckey,
+  const unsigned char *nonce,
+  int *recid
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
+
+/** Recover an ECDSA public key from a compact signature.
+ *  Returns: 1: public key successfully recovered (which guarantees a correct signature).
+ *           0: otherwise.
+ *  In:      msg:        the message assumed to be signed (cannot be NULL)
+ *           msglen:     the length of the message (at most 32)
+ *           sig64:      signature as 64 byte array (cannot be NULL)
+ *           compressed: whether to recover a compressed or uncompressed pubkey
+ *           recid:      the recovery id (0-3, as returned by ecdsa_sign_compact)
+ *  Out:     pubkey:     pointer to a 33 or 65 byte array to put the pubkey (cannot be NULL)
+ *           pubkeylen:  pointer to an int that will contain the pubkey length (cannot be NULL)
+ * Requires starting using SECP256K1_START_VERIFY.
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_recover_compact(
+  const unsigned char *msg,
+  int msglen,
+  const unsigned char *sig64,
+  unsigned char *pubkey,
+  int *pubkeylen,
+  int compressed,
+  int recid
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
+
+/** Verify an ECDSA secret key.
+ *  Returns: 1: secret key is valid
+ *           0: secret key is invalid
+ *  In:      seckey: pointer to a 32-byte secret key (cannot be NULL)
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_seckey_verify(const unsigned char *seckey) SECP256K1_ARG_NONNULL(1);
+
+/** Just validate a public key.
+ *  Returns: 1: valid public key
+ *           0: invalid public key
+ *  In:      pubkey:    pointer to a 33-byte or 65-byte public key (cannot be NULL).
+ *           pubkeylen: length of pubkey
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_verify(const unsigned char *pubkey, int pubkeylen) SECP256K1_ARG_NONNULL(1);
+
+/** Compute the public key for a secret key.
+ *  In:     compressed: whether the computed public key should be compressed
+ *          seckey:     pointer to a 32-byte private key (cannot be NULL)
+ *  Out:    pubkey:     pointer to a 33-byte (if compressed) or 65-byte (if uncompressed)
+ *                      area to store the public key (cannot be NULL)
+ *          pubkeylen:  pointer to int that will be updated to contains the pubkey's
+ *                      length (cannot be NULL)
+ *  Returns: 1: secret was valid, public key stores
+ *           0: secret was invalid, try again.
+ * Requires starting using SECP256K1_START_SIGN.
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_create(
+  unsigned char *pubkey,
+  int *pubkeylen,
+  const unsigned char *seckey,
+  int compressed
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Decompress a public key.
+ * In/Out: pubkey:    pointer to a 65-byte array to put the decompressed public key.
+                      It must contain a 33-byte or 65-byte public key already (cannot be NULL)
+ *         pubkeylen: pointer to the size of the public key pointed to by pubkey (cannot be NULL)
+                      It will be updated to reflect the new size.
+ * Returns: 0 if the passed public key was invalid, 1 otherwise. If 1 is returned, the
+            pubkey is replaced with its decompressed version.
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_decompress(
+  unsigned char *pubkey,
+  int *pubkeylen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Export a private key in DER format. */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_export(
+  const unsigned char *seckey,
+  unsigned char *privkey,
+  int *privkeylen,
+  int compressed
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3);
+
+/** Import a private key in DER format. */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_import(
+  unsigned char *seckey,
+  const unsigned char *privkey,
+  int privkeylen
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Tweak a private key by adding tweak to it. */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_tweak_add(
+  unsigned char *seckey,
+  const unsigned char *tweak
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Tweak a public key by adding tweak times the generator to it.
+ * Requires starting with SECP256K1_START_VERIFY.
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_tweak_add(
+  unsigned char *pubkey,
+  int pubkeylen,
+  const unsigned char *tweak
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3);
+
+/** Tweak a private key by multiplying it with tweak. */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_privkey_tweak_mul(
+  unsigned char *seckey,
+  const unsigned char *tweak
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2);
+
+/** Tweak a public key by multiplying it with tweak.
+ * Requires starting with SECP256K1_START_VERIFY.
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ec_pubkey_tweak_mul(
+  unsigned char *pubkey,
+  int pubkeylen,
+  const unsigned char *tweak
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3);
+
+# ifdef __cplusplus
+}
+# endif
+
+#endif
diff --git a/libsecp256k1.pc.in b/libsecp256k1.pc.in
new file mode 100644
index 0000000000..1c72dd0003
--- /dev/null
+++ b/libsecp256k1.pc.in
@@ -0,0 +1,13 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libsecp256k1
+Description: Optimized C library for EC operations on curve secp256k1
+URL: https://github.com/bitcoin/secp256k1
+Version: @PACKAGE_VERSION@
+Cflags: -I${includedir}
+Libs.private: @SECP_LIBS@
+Libs: -L${libdir} -lsecp256k1
+
diff --git a/nasm_lt.sh b/nasm_lt.sh
new file mode 100755
index 0000000000..6cd73294c0
--- /dev/null
+++ b/nasm_lt.sh
@@ -0,0 +1,57 @@
+#! /bin/sh
+command=""
+infile=""
+o_opt=no
+pic=no
+while [ $# -gt 0 ]; do
+    case "$1" in
+        -DPIC|-fPIC|-fpic|-Kpic|-KPIC)
+            if [ "$pic" != "yes" ] ; then
+                command="$command -DPIC"
+                pic=yes
+            fi
+            ;;
+        -f|-fbin|-faout|-faoutb|-fcoff|-felf|-felf64|-fas86| \
+        -fobj|-fwin32|-fwin64|-frdf|-fieee|-fmacho|-fmacho64)
+            # it's a file format specifier for nasm.
+            command="$command $1"
+            ;;
+        -f*)
+            # maybe a code-generation flag for gcc.
+            ;;
+        -[Ii]*)
+            incdir=`echo "$1" | sed 's/^-[Ii]//'`
+            if [ "x$incdir" = x -a "x$2" != x ] ; then
+                case "$2" in
+                    -*) ;;
+                    *) incdir="$2"; shift;;
+                esac
+            fi
+            if [ "x$incdir" != x ] ; then
+                # In the case of NASM, the trailing slash is necessary.
+                incdir=`echo "$incdir" | sed 's%/*$%/%'`
+                command="$command -I$incdir"
+            fi
+            ;;
+        -o*)
+            o_opt=yes
+            command="$command $1"
+            ;;
+        *.asm)
+            infile=$1
+            command="$command $1"
+            ;;
+        *)
+            command="$command $1"
+            ;;
+    esac
+    shift
+done
+if [ "$o_opt" != yes ] ; then
+    # By default, NASM creates an output file
+    # in the same directory as the input file.
+    outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o"
+    command="$command $outfile"
+fi
+echo $command
+exec $command
diff --git a/obj/.gitignore b/obj/.gitignore
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/bench_inv.c b/src/bench_inv.c
new file mode 100644
index 0000000000..d6f664333f
--- /dev/null
+++ b/src/bench_inv.c
@@ -0,0 +1,41 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+#include <stdio.h>
+
+#include "include/secp256k1.h"
+
+#include "util.h"
+#include "num_impl.h"
+#include "field_impl.h"
+#include "group_impl.h"
+#include "scalar_impl.h"
+
+int main(void) {
+    static const unsigned char init[32] = {
+        0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
+        0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
+        0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
+        0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
+    };
+    static const unsigned char fini[32] = {
+        0xba, 0x28, 0x58, 0xd8, 0xaa, 0x11, 0xd6, 0xf2,
+        0xfa, 0xce, 0x50, 0xb1, 0x67, 0x19, 0xb1, 0xa6,
+        0xe0, 0xaa, 0x84, 0x53, 0xf6, 0x80, 0xfc, 0x23,
+        0x88, 0x3c, 0xd6, 0x74, 0x9f, 0x27, 0x09, 0x03
+    };
+    secp256k1_ge_start();
+    secp256k1_scalar_t base, x;
+    secp256k1_scalar_set_b32(&base, init, NULL);
+    secp256k1_scalar_set_b32(&x, init, NULL);
+    for (int i=0; i<1000000; i++) {
+        secp256k1_scalar_inverse(&x, &x);
+        secp256k1_scalar_add(&x, &x, &base);
+    }
+    unsigned char res[32];
+    secp256k1_scalar_get_b32(res, &x);
+    CHECK(memcmp(res, fini, 32) == 0);
+    return 0;
+}
diff --git a/src/bench_sign.c b/src/bench_sign.c
new file mode 100644
index 0000000000..f01f11d689
--- /dev/null
+++ b/src/bench_sign.c
@@ -0,0 +1,49 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+#include <stdio.h>
+#include <string.h>
+
+#include "include/secp256k1.h"
+#include "util.h"
+
+int main(void) {
+    secp256k1_start(SECP256K1_START_SIGN);
+
+    unsigned char msg[32];
+    unsigned char nonce[32];
+    unsigned char key[32];
+
+    for (int i = 0; i < 32; i++) msg[i] = i + 1;
+    for (int i = 0; i < 32; i++) nonce[i] = i + 33;
+    for (int i = 0; i < 32; i++) key[i] = i + 65;
+
+    unsigned char sig[64];
+
+    for (int i=0; i<1000000; i++) {
+        int recid = 0;
+        CHECK(secp256k1_ecdsa_sign_compact(msg, 32, sig, key, nonce, &recid));
+        for (int j = 0; j < 32; j++) {
+            nonce[j] = key[j];     /* Move former key to nonce  */
+            msg[j] = sig[j];       /* Move former R to message. */
+            key[j] = sig[j + 32];  /* Move former S to key.     */
+        }
+    }
+
+    static const unsigned char fini[64] = {
+        0x92, 0x03, 0xef, 0xf1, 0x58, 0x0b, 0x49, 0x8d,
+        0x22, 0x3d, 0x49, 0x0e, 0xbf, 0x26, 0x50, 0x0e,
+        0x2d, 0x62, 0x90, 0xd7, 0x82, 0xbd, 0x3d, 0x5c,
+        0xa9, 0x10, 0xa5, 0x49, 0xb1, 0xd8, 0x8c, 0xc0,
+        0x5b, 0x5e, 0x9e, 0x68, 0x51, 0x3d, 0xe8, 0xec,
+        0x82, 0x30, 0x82, 0x88, 0x8c, 0xfd, 0xe7, 0x71,
+        0x15, 0x92, 0xfc, 0x14, 0x59, 0x78, 0x31, 0xb3,
+        0xf6, 0x07, 0x91, 0x18, 0x00, 0x8d, 0x4c, 0xb2
+    };
+    CHECK(memcmp(sig, fini, 64) == 0);
+
+    secp256k1_stop();
+    return 0;
+}
diff --git a/src/bench_verify.c b/src/bench_verify.c
new file mode 100644
index 0000000000..690595516d
--- /dev/null
+++ b/src/bench_verify.c
@@ -0,0 +1,44 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+
+#include "include/secp256k1.h"
+#include "util.h"
+
+int main(void) {
+    secp256k1_start(SECP256K1_START_VERIFY);
+
+    unsigned char msg[32];
+    unsigned char sig[64];
+
+    for (int i = 0; i < 32; i++) msg[i] = 1 + i;
+    for (int i = 0; i < 64; i++) sig[i] = 65 + i;
+
+    unsigned char pubkey[33];
+    for (int i=0; i<1000000; i++) {
+        int pubkeylen = 33;
+        CHECK(secp256k1_ecdsa_recover_compact(msg, 32, sig, pubkey, &pubkeylen, 1, i % 2));
+        for (int j = 0; j < 32; j++) {
+            sig[j + 32] = msg[j];    /* Move former message to S. */
+            msg[j] = sig[j];         /* Move former R to message. */
+            sig[j] = pubkey[j + 1];  /* Move recovered pubkey X coordinate to R (which must be a valid X coordinate). */
+        }
+    }
+
+    static const unsigned char fini[33] = {
+        0x02,
+        0x52, 0x63, 0xae, 0x9a, 0x9d, 0x47, 0x1f, 0x1a,
+        0xb2, 0x36, 0x65, 0x89, 0x11, 0xe7, 0xcc, 0x86,
+        0xa3, 0xab, 0x97, 0xb6, 0xf1, 0xaf, 0xfd, 0x8f,
+        0x9b, 0x38, 0xb6, 0x18, 0x55, 0xe5, 0xc2, 0x43
+    };
+    CHECK(memcmp(fini, pubkey, 33) == 0);
+
+    secp256k1_stop();
+    return 0;
+}
diff --git a/src/ecdsa.h b/src/ecdsa.h
new file mode 100644
index 0000000000..3b1e0484ea
--- /dev/null
+++ b/src/ecdsa.h
@@ -0,0 +1,23 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECDSA_
+#define _SECP256K1_ECDSA_
+
+#include "num.h"
+
+typedef struct {
+    secp256k1_num_t r, s;
+} secp256k1_ecdsa_sig_t;
+
+static int secp256k1_ecdsa_sig_parse(secp256k1_ecdsa_sig_t *r, const unsigned char *sig, int size);
+static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_ecdsa_sig_t *a);
+static int secp256k1_ecdsa_sig_verify(const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_num_t *message);
+static int secp256k1_ecdsa_sig_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *seckey, const secp256k1_scalar_t *message, const secp256k1_scalar_t *nonce, int *recid);
+static int secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256k1_ge_t *pubkey, const secp256k1_num_t *message, int recid);
+static void secp256k1_ecdsa_sig_set_rs(secp256k1_ecdsa_sig_t *sig, const secp256k1_num_t *r, const secp256k1_num_t *s);
+
+#endif
diff --git a/src/ecdsa_impl.h b/src/ecdsa_impl.h
new file mode 100644
index 0000000000..4c05ec39f8
--- /dev/null
+++ b/src/ecdsa_impl.h
@@ -0,0 +1,183 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                               *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+
+#ifndef _SECP256K1_ECDSA_IMPL_H_
+#define _SECP256K1_ECDSA_IMPL_H_
+
+#include "num.h"
+#include "field.h"
+#include "group.h"
+#include "ecmult.h"
+#include "ecmult_gen.h"
+#include "ecdsa.h"
+
+static int secp256k1_ecdsa_sig_parse(secp256k1_ecdsa_sig_t *r, const unsigned char *sig, int size) {
+    if (sig[0] != 0x30) return 0;
+    int lenr = sig[3];
+    if (5+lenr >= size) return 0;
+    int lens = sig[lenr+5];
+    if (sig[1] != lenr+lens+4) return 0;
+    if (lenr+lens+6 > size) return 0;
+    if (sig[2] != 0x02) return 0;
+    if (lenr == 0) return 0;
+    if (sig[lenr+4] != 0x02) return 0;
+    if (lens == 0) return 0;
+    secp256k1_num_set_bin(&r->r, sig+4, lenr);
+    secp256k1_num_set_bin(&r->s, sig+6+lenr, lens);
+    return 1;
+}
+
+static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_ecdsa_sig_t *a) {
+    int lenR = (secp256k1_num_bits(&a->r) + 7)/8;
+    if (lenR == 0 || secp256k1_num_get_bit(&a->r, lenR*8-1))
+        lenR++;
+    int lenS = (secp256k1_num_bits(&a->s) + 7)/8;
+    if (lenS == 0 || secp256k1_num_get_bit(&a->s, lenS*8-1))
+        lenS++;
+    if (*size < 6+lenS+lenR)
+        return 0;
+    *size = 6 + lenS + lenR;
+    sig[0] = 0x30;
+    sig[1] = 4 + lenS + lenR;
+    sig[2] = 0x02;
+    sig[3] = lenR;
+    secp256k1_num_get_bin(sig+4, lenR, &a->r);
+    sig[4+lenR] = 0x02;
+    sig[5+lenR] = lenS;
+    secp256k1_num_get_bin(sig+lenR+6, lenS, &a->s);
+    return 1;
+}
+
+static int secp256k1_ecdsa_sig_recompute(secp256k1_num_t *r2, const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_num_t *message) {
+    const secp256k1_ge_consts_t *c = secp256k1_ge_consts;
+
+    if (secp256k1_num_is_neg(&sig->r) || secp256k1_num_is_neg(&sig->s))
+        return 0;
+    if (secp256k1_num_is_zero(&sig->r) || secp256k1_num_is_zero(&sig->s))
+        return 0;
+    if (secp256k1_num_cmp(&sig->r, &c->order) >= 0 || secp256k1_num_cmp(&sig->s, &c->order) >= 0)
+        return 0;
+
+    int ret = 0;
+    secp256k1_num_t sn, u1, u2;
+    secp256k1_num_init(&sn);
+    secp256k1_num_init(&u1);
+    secp256k1_num_init(&u2);
+    secp256k1_num_mod_inverse(&sn, &sig->s, &c->order);
+    secp256k1_num_mod_mul(&u1, &sn, message, &c->order);
+    secp256k1_num_mod_mul(&u2, &sn, &sig->r, &c->order);
+    secp256k1_gej_t pubkeyj; secp256k1_gej_set_ge(&pubkeyj, pubkey);
+    secp256k1_gej_t pr; secp256k1_ecmult(&pr, &pubkeyj, &u2, &u1);
+    if (!secp256k1_gej_is_infinity(&pr)) {
+        secp256k1_fe_t xr; secp256k1_gej_get_x_var(&xr, &pr);
+        secp256k1_fe_normalize(&xr);
+        unsigned char xrb[32]; secp256k1_fe_get_b32(xrb, &xr);
+        secp256k1_num_set_bin(r2, xrb, 32);
+        secp256k1_num_mod(r2, &c->order);
+        ret = 1;
+    }
+    secp256k1_num_free(&sn);
+    secp256k1_num_free(&u1);
+    secp256k1_num_free(&u2);
+    return ret;
+}
+
+static int secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256k1_ge_t *pubkey, const secp256k1_num_t *message, int recid) {
+    const secp256k1_ge_consts_t *c = secp256k1_ge_consts;
+
+    if (secp256k1_num_is_neg(&sig->r) || secp256k1_num_is_neg(&sig->s))
+        return 0;
+    if (secp256k1_num_is_zero(&sig->r) || secp256k1_num_is_zero(&sig->s))
+        return 0;
+    if (secp256k1_num_cmp(&sig->r, &c->order) >= 0 || secp256k1_num_cmp(&sig->s, &c->order) >= 0)
+        return 0;
+
+    secp256k1_num_t rx;
+    secp256k1_num_init(&rx);
+    secp256k1_num_copy(&rx, &sig->r);
+    if (recid & 2) {
+        secp256k1_num_add(&rx, &rx, &c->order);
+        if (secp256k1_num_cmp(&rx, &secp256k1_fe_consts->p) >= 0)
+            return 0;
+    }
+    unsigned char brx[32];
+    secp256k1_num_get_bin(brx, 32, &rx);
+    secp256k1_num_free(&rx);
+    secp256k1_fe_t fx;
+    secp256k1_fe_set_b32(&fx, brx);
+    secp256k1_ge_t x;
+    if (!secp256k1_ge_set_xo(&x, &fx, recid & 1))
+        return 0;
+    secp256k1_gej_t xj;
+    secp256k1_gej_set_ge(&xj, &x);
+    secp256k1_num_t rn, u1, u2;
+    secp256k1_num_init(&rn);
+    secp256k1_num_init(&u1);
+    secp256k1_num_init(&u2);
+    secp256k1_num_mod_inverse(&rn, &sig->r, &c->order);
+    secp256k1_num_mod_mul(&u1, &rn, message, &c->order);
+    secp256k1_num_sub(&u1, &c->order, &u1);
+    secp256k1_num_mod_mul(&u2, &rn, &sig->s, &c->order);
+    secp256k1_gej_t qj;
+    secp256k1_ecmult(&qj, &xj, &u2, &u1);
+    secp256k1_ge_set_gej_var(pubkey, &qj);
+    secp256k1_num_free(&rn);
+    secp256k1_num_free(&u1);
+    secp256k1_num_free(&u2);
+    return !secp256k1_gej_is_infinity(&qj);
+}
+
+static int secp256k1_ecdsa_sig_verify(const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_num_t *message) {
+    secp256k1_num_t r2;
+    secp256k1_num_init(&r2);
+    int ret = 0;
+    ret = secp256k1_ecdsa_sig_recompute(&r2, sig, pubkey, message) && secp256k1_num_cmp(&sig->r, &r2) == 0;
+    secp256k1_num_free(&r2);
+    return ret;
+}
+
+static int secp256k1_ecdsa_sig_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *seckey, const secp256k1_scalar_t *message, const secp256k1_scalar_t *nonce, int *recid) {
+    secp256k1_gej_t rp;
+    secp256k1_ecmult_gen(&rp, nonce);
+    secp256k1_ge_t r;
+    secp256k1_ge_set_gej(&r, &rp);
+    unsigned char b[32];
+    secp256k1_fe_normalize(&r.x);
+    secp256k1_fe_normalize(&r.y);
+    secp256k1_fe_get_b32(b, &r.x);
+    int overflow = 0;
+    secp256k1_scalar_t sigr;
+    secp256k1_scalar_set_b32(&sigr, b, &overflow);
+    if (recid)
+        *recid = (overflow ? 2 : 0) | (secp256k1_fe_is_odd(&r.y) ? 1 : 0);
+    secp256k1_scalar_t n;
+    secp256k1_scalar_mul(&n, &sigr, seckey);
+    secp256k1_scalar_add(&n, &n, message);
+    secp256k1_scalar_t sigs;
+    secp256k1_scalar_inverse(&sigs, nonce);
+    secp256k1_scalar_mul(&sigs, &sigs, &n);
+    secp256k1_scalar_clear(&n);
+    secp256k1_gej_clear(&rp);
+    secp256k1_ge_clear(&r);
+    if (secp256k1_scalar_is_zero(&sigs))
+        return 0;
+    if (secp256k1_scalar_is_high(&sigs)) {
+        secp256k1_scalar_negate(&sigs, &sigs);
+        if (recid)
+            *recid ^= 1;
+    }
+    secp256k1_scalar_get_num(&sig->s, &sigs);
+    secp256k1_scalar_get_num(&sig->r, &sigr);
+    return 1;
+}
+
+static void secp256k1_ecdsa_sig_set_rs(secp256k1_ecdsa_sig_t *sig, const secp256k1_num_t *r, const secp256k1_num_t *s) {
+    secp256k1_num_copy(&sig->r, r);
+    secp256k1_num_copy(&sig->s, s);
+}
+
+#endif
diff --git a/src/eckey.h b/src/eckey.h
new file mode 100644
index 0000000000..024c8b821b
--- /dev/null
+++ b/src/eckey.h
@@ -0,0 +1,25 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECKEY_
+#define _SECP256K1_ECKEY_
+
+#include "group.h"
+#include "scalar.h"
+#include "num.h"
+
+static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size);
+static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char *pub, int *size, int compressed);
+
+static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned char *privkey, int privkeylen);
+static int secp256k1_eckey_privkey_serialize(unsigned char *privkey, int *privkeylen, const secp256k1_scalar_t *key, int compressed);
+
+static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak);
+static int secp256k1_eckey_pubkey_tweak_add(secp256k1_ge_t *key, const secp256k1_num_t *tweak);
+static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak);
+static int secp256k1_eckey_pubkey_tweak_mul(secp256k1_ge_t *key, const secp256k1_num_t *tweak);
+
+#endif
diff --git a/src/eckey_impl.h b/src/eckey_impl.h
new file mode 100644
index 0000000000..290b1f0900
--- /dev/null
+++ b/src/eckey_impl.h
@@ -0,0 +1,200 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECKEY_IMPL_H_
+#define _SECP256K1_ECKEY_IMPL_H_
+
+#include "eckey.h"
+
+#include "num.h"
+#include "field.h"
+#include "group.h"
+#include "ecmult_gen.h"
+
+static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size) {
+    if (size == 33 && (pub[0] == 0x02 || pub[0] == 0x03)) {
+        secp256k1_fe_t x;
+        secp256k1_fe_set_b32(&x, pub+1);
+        return secp256k1_ge_set_xo(elem, &x, pub[0] == 0x03);
+    } else if (size == 65 && (pub[0] == 0x04 || pub[0] == 0x06 || pub[0] == 0x07)) {
+        secp256k1_fe_t x, y;
+        secp256k1_fe_set_b32(&x, pub+1);
+        secp256k1_fe_set_b32(&y, pub+33);
+        secp256k1_ge_set_xy(elem, &x, &y);
+        if ((pub[0] == 0x06 || pub[0] == 0x07) && secp256k1_fe_is_odd(&y) != (pub[0] == 0x07))
+            return 0;
+        return secp256k1_ge_is_valid(elem);
+    } else {
+        return 0;
+    }
+}
+
+static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char *pub, int *size, int compressed) {
+    if (secp256k1_ge_is_infinity(elem)) {
+        return 0;
+    }
+    secp256k1_fe_normalize(&elem->x);
+    secp256k1_fe_normalize(&elem->y);
+    secp256k1_fe_get_b32(&pub[1], &elem->x);
+    if (compressed) {
+        *size = 33;
+        pub[0] = 0x02 | (secp256k1_fe_is_odd(&elem->y) ? 0x01 : 0x00);
+    } else {
+        *size = 65;
+        pub[0] = 0x04;
+        secp256k1_fe_get_b32(&pub[33], &elem->y);
+    }
+    return 1;
+}
+
+static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned char *privkey, int privkeylen) {
+    const unsigned char *end = privkey + privkeylen;
+    /* sequence header */
+    if (end < privkey+1 || *privkey != 0x30)
+        return 0;
+    privkey++;
+    /* sequence length constructor */
+    int lenb = 0;
+    if (end < privkey+1 || !(*privkey & 0x80))
+        return 0;
+    lenb = *privkey & ~0x80; privkey++;
+    if (lenb < 1 || lenb > 2)
+        return 0;
+    if (end < privkey+lenb)
+        return 0;
+    /* sequence length */
+    int len = 0;
+    len = privkey[lenb-1] | (lenb > 1 ? privkey[lenb-2] << 8 : 0);
+    privkey += lenb;
+    if (end < privkey+len)
+        return 0;
+    /* sequence element 0: version number (=1) */
+    if (end < privkey+3 || privkey[0] != 0x02 || privkey[1] != 0x01 || privkey[2] != 0x01)
+        return 0;
+    privkey += 3;
+    /* sequence element 1: octet string, up to 32 bytes */
+    if (end < privkey+2 || privkey[0] != 0x04 || privkey[1] > 0x20 || end < privkey+2+privkey[1])
+        return 0;
+    int overflow = 0;
+    unsigned char c[32] = {0};
+    memcpy(c + 32 - privkey[1], privkey + 2, privkey[1]);
+    secp256k1_scalar_set_b32(key, c, &overflow);
+    memset(c, 0, 32);
+    return !overflow;
+}
+
+static int secp256k1_eckey_privkey_serialize(unsigned char *privkey, int *privkeylen, const secp256k1_scalar_t *key, int compressed) {
+    secp256k1_gej_t rp;
+    secp256k1_ecmult_gen(&rp, key);
+    secp256k1_ge_t r;
+    secp256k1_ge_set_gej(&r, &rp);
+    if (compressed) {
+        static const unsigned char begin[] = {
+            0x30,0x81,0xD3,0x02,0x01,0x01,0x04,0x20
+        };
+        static const unsigned char middle[] = {
+            0xA0,0x81,0x85,0x30,0x81,0x82,0x02,0x01,0x01,0x30,0x2C,0x06,0x07,0x2A,0x86,0x48,
+            0xCE,0x3D,0x01,0x01,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F,0x30,0x06,0x04,0x01,0x00,0x04,0x01,0x07,0x04,
+            0x21,0x02,0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,0x55,0xA0,0x62,0x95,0xCE,0x87,
+            0x0B,0x07,0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,0x59,0xF2,0x81,0x5B,0x16,0xF8,
+            0x17,0x98,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFF,0xFF,0xFE,0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,0xBF,0xD2,0x5E,
+            0x8C,0xD0,0x36,0x41,0x41,0x02,0x01,0x01,0xA1,0x24,0x03,0x22,0x00
+        };
+        unsigned char *ptr = privkey;
+        memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin);
+        secp256k1_scalar_get_b32(ptr, key); ptr += 32;
+        memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle);
+        int pubkeylen = 0;
+        if (!secp256k1_eckey_pubkey_serialize(&r, ptr, &pubkeylen, 1)) {
+            return 0;
+        }
+        ptr += pubkeylen;
+        *privkeylen = ptr - privkey;
+    } else {
+        static const unsigned char begin[] = {
+            0x30,0x82,0x01,0x13,0x02,0x01,0x01,0x04,0x20
+        };
+        static const unsigned char middle[] = {
+            0xA0,0x81,0xA5,0x30,0x81,0xA2,0x02,0x01,0x01,0x30,0x2C,0x06,0x07,0x2A,0x86,0x48,
+            0xCE,0x3D,0x01,0x01,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F,0x30,0x06,0x04,0x01,0x00,0x04,0x01,0x07,0x04,
+            0x41,0x04,0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,0x55,0xA0,0x62,0x95,0xCE,0x87,
+            0x0B,0x07,0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,0x59,0xF2,0x81,0x5B,0x16,0xF8,
+            0x17,0x98,0x48,0x3A,0xDA,0x77,0x26,0xA3,0xC4,0x65,0x5D,0xA4,0xFB,0xFC,0x0E,0x11,
+            0x08,0xA8,0xFD,0x17,0xB4,0x48,0xA6,0x85,0x54,0x19,0x9C,0x47,0xD0,0x8F,0xFB,0x10,
+            0xD4,0xB8,0x02,0x21,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+            0xFF,0xFF,0xFF,0xFF,0xFE,0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,0xBF,0xD2,0x5E,
+            0x8C,0xD0,0x36,0x41,0x41,0x02,0x01,0x01,0xA1,0x44,0x03,0x42,0x00
+        };
+        unsigned char *ptr = privkey;
+        memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin);
+        secp256k1_scalar_get_b32(ptr, key); ptr += 32;
+        memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle);
+        int pubkeylen = 0;
+        if (!secp256k1_eckey_pubkey_serialize(&r, ptr, &pubkeylen, 0)) {
+            return 0;
+        }
+        ptr += pubkeylen;
+        *privkeylen = ptr - privkey;
+    }
+    return 1;
+}
+
+static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak) {
+    secp256k1_scalar_add(key, key, tweak);
+    if (secp256k1_scalar_is_zero(key))
+        return 0;
+    return 1;
+}
+
+static int secp256k1_eckey_pubkey_tweak_add(secp256k1_ge_t *key, const secp256k1_num_t *tweak) {
+    if (secp256k1_num_cmp(tweak, &secp256k1_ge_consts->order) >= 0)
+        return 0;
+
+    secp256k1_gej_t pt;
+    secp256k1_gej_set_ge(&pt, key);
+    secp256k1_num_t one;
+    secp256k1_num_init(&one);
+    secp256k1_num_set_int(&one, 1);
+    secp256k1_ecmult(&pt, &pt, &one, tweak);
+    secp256k1_num_free(&one);
+
+    if (secp256k1_gej_is_infinity(&pt))
+        return 0;
+    secp256k1_ge_set_gej(key, &pt);
+    return 1;
+}
+
+static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar_t *key, const secp256k1_scalar_t *tweak) {
+    if (secp256k1_scalar_is_zero(tweak))
+        return 0;
+
+    secp256k1_scalar_mul(key, key, tweak);
+    return 1;
+}
+
+static int secp256k1_eckey_pubkey_tweak_mul(secp256k1_ge_t *key, const secp256k1_num_t *tweak) {
+    if (secp256k1_num_is_zero(tweak))
+        return 0;
+    if (secp256k1_num_cmp(tweak, &secp256k1_ge_consts->order) >= 0)
+        return 0;
+
+    secp256k1_num_t zero;
+    secp256k1_num_init(&zero);
+    secp256k1_num_set_int(&zero, 0);
+    secp256k1_gej_t pt;
+    secp256k1_gej_set_ge(&pt, key);
+    secp256k1_ecmult(&pt, &pt, tweak, &zero);
+    secp256k1_num_free(&zero);
+    secp256k1_ge_set_gej(key, &pt);
+    return 1;
+}
+
+#endif
diff --git a/src/ecmult.h b/src/ecmult.h
new file mode 100644
index 0000000000..e3cf18b680
--- /dev/null
+++ b/src/ecmult.h
@@ -0,0 +1,19 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_
+#define _SECP256K1_ECMULT_
+
+#include "num.h"
+#include "group.h"
+
+static void secp256k1_ecmult_start(void);
+static void secp256k1_ecmult_stop(void);
+
+/** Double multiply: R = na*A + ng*G */
+static void secp256k1_ecmult(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_num_t *na, const secp256k1_num_t *ng);
+
+#endif
diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
new file mode 100644
index 0000000000..42f822f9ce
--- /dev/null
+++ b/src/ecmult_gen.h
@@ -0,0 +1,19 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_GEN_
+#define _SECP256K1_ECMULT_GEN_
+
+#include "scalar.h"
+#include "group.h"
+
+static void secp256k1_ecmult_gen_start(void);
+static void secp256k1_ecmult_gen_stop(void);
+
+/** Multiply with the generator: R = a*G */
+static void secp256k1_ecmult_gen(secp256k1_gej_t *r, const secp256k1_scalar_t *a);
+
+#endif
diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
new file mode 100644
index 0000000000..07859ab04b
--- /dev/null
+++ b/src/ecmult_gen_impl.h
@@ -0,0 +1,118 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_GEN_IMPL_H_
+#define _SECP256K1_ECMULT_GEN_IMPL_H_
+
+#include "scalar.h"
+#include "group.h"
+#include "ecmult_gen.h"
+
+typedef struct {
+    /* For accelerating the computation of a*G:
+     * To harden against timing attacks, use the following mechanism:
+     * * Break up the multiplicand into groups of 4 bits, called n_0, n_1, n_2, ..., n_63.
+     * * Compute sum(n_i * 16^i * G + U_i, i=0..63), where:
+     *   * U_i = U * 2^i (for i=0..62)
+     *   * U_i = U * (1-2^63) (for i=63)
+     *   where U is a point with no known corresponding scalar. Note that sum(U_i, i=0..63) = 0.
+     * For each i, and each of the 16 possible values of n_i, (n_i * 16^i * G + U_i) is
+     * precomputed (call it prec(i, n_i)). The formula now becomes sum(prec(i, n_i), i=0..63).
+     * None of the resulting prec group elements have a known scalar, and neither do any of
+     * the intermediate sums while computing a*G.
+     * To make memory access uniform, the bytes of prec(i, n_i) are sliced per value of n_i. */
+    unsigned char prec[64][sizeof(secp256k1_ge_t)][16]; /* prec[j][k][i] = k'th byte of (16^j * i * G + U_i) */
+} secp256k1_ecmult_gen_consts_t;
+
+static const secp256k1_ecmult_gen_consts_t *secp256k1_ecmult_gen_consts = NULL;
+
+static void secp256k1_ecmult_gen_start(void) {
+    if (secp256k1_ecmult_gen_consts != NULL)
+        return;
+
+    /* Allocate the precomputation table. */
+    secp256k1_ecmult_gen_consts_t *ret = (secp256k1_ecmult_gen_consts_t*)malloc(sizeof(secp256k1_ecmult_gen_consts_t));
+
+    /* get the generator */
+    const secp256k1_ge_t *g = &secp256k1_ge_consts->g;
+    secp256k1_gej_t gj; secp256k1_gej_set_ge(&gj, g);
+
+    /* Construct a group element with no known corresponding scalar (nothing up my sleeve). */
+    secp256k1_gej_t nums_gej;
+    {
+        static const unsigned char nums_b32[32] = "The scalar for this x is unknown";
+        secp256k1_fe_t nums_x;
+        secp256k1_fe_set_b32(&nums_x, nums_b32);
+        secp256k1_ge_t nums_ge;
+        VERIFY_CHECK(secp256k1_ge_set_xo(&nums_ge, &nums_x, 0));
+        secp256k1_gej_set_ge(&nums_gej, &nums_ge);
+        /* Add G to make the bits in x uniformly distributed. */
+        secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, g);
+    }
+
+    /* compute prec. */
+    secp256k1_ge_t prec[1024];
+    {
+        secp256k1_gej_t precj[1024]; /* Jacobian versions of prec. */
+        secp256k1_gej_t gbase; gbase = gj; /* 16^j * G */
+        secp256k1_gej_t numsbase; numsbase = nums_gej; /* 2^j * nums. */
+        for (int j=0; j<64; j++) {
+            /* Set precj[j*16 .. j*16+15] to (numsbase, numsbase + gbase, ..., numsbase + 15*gbase). */
+            precj[j*16] = numsbase;
+            for (int i=1; i<16; i++) {
+                secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase);
+            }
+            /* Multiply gbase by 16. */
+            for (int i=0; i<4; i++) {
+                secp256k1_gej_double_var(&gbase, &gbase);
+            }
+            /* Multiply numbase by 2. */
+            secp256k1_gej_double_var(&numsbase, &numsbase);
+            if (j == 62) {
+                /* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
+                secp256k1_gej_neg(&numsbase, &numsbase);
+                secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej);
+            }
+        }
+        secp256k1_ge_set_all_gej_var(1024, prec, precj);
+    }
+    for (int j=0; j<64; j++) {
+        for (int i=0; i<16; i++) {
+            const unsigned char* raw = (const unsigned char*)(&prec[j*16 + i]);
+            for (size_t k=0; k<sizeof(secp256k1_ge_t); k++)
+                ret->prec[j][k][i] = raw[k];
+        }
+    }
+
+    /* Set the global pointer to the precomputation table. */
+    secp256k1_ecmult_gen_consts = ret;
+}
+
+static void secp256k1_ecmult_gen_stop(void) {
+    if (secp256k1_ecmult_gen_consts == NULL)
+        return;
+
+    secp256k1_ecmult_gen_consts_t *c = (secp256k1_ecmult_gen_consts_t*)secp256k1_ecmult_gen_consts;
+    secp256k1_ecmult_gen_consts = NULL;
+    free(c);
+}
+
+static void secp256k1_ecmult_gen(secp256k1_gej_t *r, const secp256k1_scalar_t *gn) {
+    const secp256k1_ecmult_gen_consts_t *c = secp256k1_ecmult_gen_consts;
+    secp256k1_gej_set_infinity(r);
+    secp256k1_ge_t add;
+    int bits;
+    for (int j=0; j<64; j++) {
+        bits = secp256k1_scalar_get_bits(gn, j * 4, 4);
+        for (size_t k=0; k<sizeof(secp256k1_ge_t); k++)
+            ((unsigned char*)(&add))[k] = c->prec[j][k][bits];
+        secp256k1_gej_add_ge(r, r, &add);
+    }
+    bits = 0;
+    secp256k1_ge_clear(&add);
+}
+
+#endif
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
new file mode 100644
index 0000000000..508902564e
--- /dev/null
+++ b/src/ecmult_impl.h
@@ -0,0 +1,222 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_IMPL_H_
+#define _SECP256K1_ECMULT_IMPL_H_
+
+#include "num.h"
+#include "group.h"
+#include "ecmult.h"
+
+/* optimal for 128-bit and 256-bit exponents. */
+#define WINDOW_A 5
+
+/** larger numbers may result in slightly better performance, at the cost of
+    exponentially larger precomputed tables. WINDOW_G == 14 results in 640 KiB. */
+#define WINDOW_G 14
+
+/** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table.
+ *  pre will contains the values [1*a,3*a,5*a,...,(2^(w-1)-1)*a], so it needs place for
+ *  2^(w-2) entries.
+ *
+ *  There are two versions of this function:
+ *  - secp256k1_ecmult_precomp_wnaf_gej, which operates on group elements in jacobian notation,
+ *    fast to precompute, but slower to use in later additions.
+ *  - secp256k1_ecmult_precomp_wnaf_ge, which operates on group elements in affine notations,
+ *    (much) slower to precompute, but a bit faster to use in later additions.
+ *  To compute a*P + b*G, we use the jacobian version for P, and the affine version for G, as
+ *  G is constant, so it only needs to be done once in advance.
+ */
+static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const secp256k1_gej_t *a, int w) {
+    pre[0] = *a;
+    secp256k1_gej_t d; secp256k1_gej_double_var(&d, &pre[0]);
+    for (int i=1; i<(1 << (w-2)); i++)
+        secp256k1_gej_add_var(&pre[i], &d, &pre[i-1]);
+}
+
+static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const secp256k1_gej_t *a, int w) {
+    const int table_size = 1 << (w-2);
+    secp256k1_gej_t prej[table_size];
+    prej[0] = *a;
+    secp256k1_gej_t d; secp256k1_gej_double_var(&d, a);
+    for (int i=1; i<table_size; i++) {
+        secp256k1_gej_add_var(&prej[i], &d, &prej[i-1]);
+    }
+    secp256k1_ge_set_all_gej_var(table_size, pre, prej);
+}
+
+/** The number of entries a table with precomputed multiples needs to have. */
+#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
+
+/** The following two macro retrieves a particular odd multiple from a table
+ *  of precomputed multiples. */
+#define ECMULT_TABLE_GET(r,pre,n,w,neg) do { \
+    VERIFY_CHECK(((n) & 1) == 1); \
+    VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
+    VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
+    if ((n) > 0) \
+        *(r) = (pre)[((n)-1)/2]; \
+    else \
+        (neg)((r), &(pre)[(-(n)-1)/2]); \
+} while(0)
+
+#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg)
+#define ECMULT_TABLE_GET_GE(r,pre,n,w)  ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg)
+
+typedef struct {
+    /* For accelerating the computation of a*P + b*G: */
+    secp256k1_ge_t pre_g[ECMULT_TABLE_SIZE(WINDOW_G)];    /* odd multiples of the generator */
+    secp256k1_ge_t pre_g_128[ECMULT_TABLE_SIZE(WINDOW_G)]; /* odd multiples of 2^128*generator */
+} secp256k1_ecmult_consts_t;
+
+static const secp256k1_ecmult_consts_t *secp256k1_ecmult_consts = NULL;
+
+static void secp256k1_ecmult_start(void) {
+    if (secp256k1_ecmult_consts != NULL)
+        return;
+
+    /* Allocate the precomputation table. */
+    secp256k1_ecmult_consts_t *ret = (secp256k1_ecmult_consts_t*)malloc(sizeof(secp256k1_ecmult_consts_t));
+
+    /* get the generator */
+    const secp256k1_ge_t *g = &secp256k1_ge_consts->g;
+    secp256k1_gej_t gj; secp256k1_gej_set_ge(&gj, g);
+
+    /* calculate 2^128*generator */
+    secp256k1_gej_t g_128j = gj;
+    for (int i=0; i<128; i++)
+        secp256k1_gej_double_var(&g_128j, &g_128j);
+
+    /* precompute the tables with odd multiples */
+    secp256k1_ecmult_table_precomp_ge_var(ret->pre_g, &gj, WINDOW_G);
+    secp256k1_ecmult_table_precomp_ge_var(ret->pre_g_128, &g_128j, WINDOW_G);
+
+    /* Set the global pointer to the precomputation table. */
+    secp256k1_ecmult_consts = ret;
+}
+
+static void secp256k1_ecmult_stop(void) {
+    if (secp256k1_ecmult_consts == NULL)
+        return;
+
+    secp256k1_ecmult_consts_t *c = (secp256k1_ecmult_consts_t*)secp256k1_ecmult_consts;
+    secp256k1_ecmult_consts = NULL;
+    free(c);
+}
+
+/** Convert a number to WNAF notation. The number becomes represented by sum(2^i * wnaf[i], i=0..bits),
+ *  with the following guarantees:
+ *  - each wnaf[i] is either 0, or an odd integer between -(1<<(w-1) - 1) and (1<<(w-1) - 1)
+ *  - two non-zero entries in wnaf are separated by at least w-1 zeroes.
+ *  - the index of the highest non-zero entry in wnaf (=return value-1) is at most bits, where
+ *    bits is the number of bits necessary to represent the absolute value of the input.
+ */
+static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_num_t *a, int w) {
+    int ret = 0;
+    int zeroes = 0;
+    secp256k1_num_t x;
+    secp256k1_num_copy(&x, a);
+    int sign = 1;
+    if (secp256k1_num_is_neg(&x)) {
+        sign = -1;
+        secp256k1_num_negate(&x);
+    }
+    while (!secp256k1_num_is_zero(&x)) {
+        while (!secp256k1_num_is_odd(&x)) {
+            zeroes++;
+            secp256k1_num_shift(&x, 1);
+        }
+        int word = secp256k1_num_shift(&x, w);
+        while (zeroes) {
+            wnaf[ret++] = 0;
+            zeroes--;
+        }
+        if (word & (1 << (w-1))) {
+            secp256k1_num_inc(&x);
+            wnaf[ret++] = sign * (word - (1 << w));
+        } else {
+            wnaf[ret++] = sign * word;
+        }
+        zeroes = w-1;
+    }
+    return ret;
+}
+
+static void secp256k1_ecmult(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_num_t *na, const secp256k1_num_t *ng) {
+    const secp256k1_ecmult_consts_t *c = secp256k1_ecmult_consts;
+
+#ifdef USE_ENDOMORPHISM
+    secp256k1_num_t na_1, na_lam;
+    /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
+    secp256k1_gej_split_exp_var(&na_1, &na_lam, na);
+
+    /* build wnaf representation for na_1 and na_lam. */
+    int wnaf_na_1[129];   int bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   &na_1,   WINDOW_A);
+    int wnaf_na_lam[129]; int bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, &na_lam, WINDOW_A);
+    int bits = bits_na_1;
+    if (bits_na_lam > bits) bits = bits_na_lam;
+#else
+    /* build wnaf representation for na. */
+    int wnaf_na[257];     int bits_na     = secp256k1_ecmult_wnaf(wnaf_na,     na,      WINDOW_A);
+    int bits = bits_na;
+#endif
+
+    /* calculate odd multiples of a */
+    secp256k1_gej_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_ecmult_table_precomp_gej_var(pre_a, a, WINDOW_A);
+
+#ifdef USE_ENDOMORPHISM
+    secp256k1_gej_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
+    for (int i=0; i<ECMULT_TABLE_SIZE(WINDOW_A); i++)
+        secp256k1_gej_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+#endif
+
+    /* Splitted G factors. */
+    secp256k1_num_t ng_1, ng_128;
+
+    /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
+    secp256k1_num_split(&ng_1, &ng_128, ng, 128);
+
+    /* Build wnaf representation for ng_1 and ng_128 */
+    int wnaf_ng_1[129];   int bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   &ng_1,   WINDOW_G);
+    int wnaf_ng_128[129]; int bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, &ng_128, WINDOW_G);
+    if (bits_ng_1 > bits) bits = bits_ng_1;
+    if (bits_ng_128 > bits) bits = bits_ng_128;
+
+    secp256k1_gej_set_infinity(r);
+    secp256k1_gej_t tmpj;
+    secp256k1_ge_t tmpa;
+
+    for (int i=bits-1; i>=0; i--) {
+        secp256k1_gej_double_var(r, r);
+        int n;
+#ifdef USE_ENDOMORPHISM
+        if (i < bits_na_1 && (n = wnaf_na_1[i])) {
+            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
+            secp256k1_gej_add_var(r, r, &tmpj);
+        }
+        if (i < bits_na_lam && (n = wnaf_na_lam[i])) {
+            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a_lam, n, WINDOW_A);
+            secp256k1_gej_add_var(r, r, &tmpj);
+        }
+#else
+        if (i < bits_na && (n = wnaf_na[i])) {
+            ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
+            secp256k1_gej_add_var(r, r, &tmpj);
+        }
+#endif
+        if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
+            ECMULT_TABLE_GET_GE(&tmpa, c->pre_g, n, WINDOW_G);
+            secp256k1_gej_add_ge_var(r, r, &tmpa);
+        }
+        if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
+            ECMULT_TABLE_GET_GE(&tmpa, c->pre_g_128, n, WINDOW_G);
+            secp256k1_gej_add_ge_var(r, r, &tmpa);
+        }
+    }
+}
+
+#endif
diff --git a/src/field.h b/src/field.h
new file mode 100644
index 0000000000..c7feead900
--- /dev/null
+++ b/src/field.h
@@ -0,0 +1,114 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_
+#define _SECP256K1_FIELD_
+
+/** Field element module.
+ *
+ *  Field elements can be represented in several ways, but code accessing
+ *  it (and implementations) need to take certain properaties into account:
+ *  - Each field element can be normalized or not.
+ *  - Each field element has a magnitude, which represents how far away
+ *    its representation is away from normalization. Normalized elements
+ *    always have a magnitude of 1, but a magnitude of 1 doesn't imply
+ *    normality.
+ */
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#if defined(USE_FIELD_GMP)
+#include "field_gmp.h"
+#elif defined(USE_FIELD_10X26)
+#include "field_10x26.h"
+#elif defined(USE_FIELD_5X52)
+#include "field_5x52.h"
+#else
+#error "Please select field implementation"
+#endif
+
+typedef struct {
+    secp256k1_num_t p;
+} secp256k1_fe_consts_t;
+
+static const secp256k1_fe_consts_t *secp256k1_fe_consts = NULL;
+
+/** Initialize field element precomputation data. */
+static void secp256k1_fe_start(void);
+
+/** Unload field element precomputation data. */
+static void secp256k1_fe_stop(void);
+
+/** Normalize a field element. */
+static void secp256k1_fe_normalize(secp256k1_fe_t *r);
+
+/** Set a field element equal to a small integer. Resulting field element is normalized. */
+static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a);
+
+/** Verify whether a field element is zero. Requires the input to be normalized. */
+static int secp256k1_fe_is_zero(const secp256k1_fe_t *a);
+
+/** Check the "oddness" of a field element. Requires the input to be normalized. */
+static int secp256k1_fe_is_odd(const secp256k1_fe_t *a);
+
+/** Compare two field elements. Requires both inputs to be normalized */
+static int secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b);
+
+/** Set a field element equal to 32-byte big endian value. Resulting field element is normalized. */
+static void secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a);
+
+/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
+static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a);
+
+/** Set a field element equal to the additive inverse of another. Takes a maximum magnitude of the input
+ *  as an argument. The magnitude of the output is one higher. */
+static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m);
+
+/** Multiplies the passed field element with a small integer constant. Multiplies the magnitude by that
+ *  small integer. */
+static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a);
+
+/** Adds a field element to another. The result has the sum of the inputs' magnitudes as magnitude. */
+static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+
+/** Sets a field element to be the product of two others. Requires the inputs' magnitudes to be at most 8.
+ *  The output magnitude is 1 (but not guaranteed to be normalized). */
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b);
+
+/** Sets a field element to be the square of another. Requires the input's magnitude to be at most 8.
+ *  The output magnitude is 1 (but not guaranteed to be normalized). */
+static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+
+/** Sets a field element to be the (modular) square root (if any exist) of another. Requires the
+ *  input's magnitude to be at most 8. The output magnitude is 1 (but not guaranteed to be
+ *  normalized). Return value indicates whether a square root was found. */
+static int secp256k1_fe_sqrt(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+
+/** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be
+ *  at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */
+static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+
+/** Potentially faster version of secp256k1_fe_inv, without constant-time guarantee. */
+static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a);
+
+/** Calculate the (modular) inverses of a batch of field elements. Requires the inputs' magnitudes to be
+ *  at most 8. The output magnitudes are 1 (but not guaranteed to be normalized). The inputs and
+ *  outputs must not overlap in memory. */
+static void secp256k1_fe_inv_all(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]);
+
+/** Potentially faster version of secp256k1_fe_inv_all, without constant-time guarantee. */
+static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]);
+
+
+/** Convert a field element to a hexadecimal string. */
+static void secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a);
+
+/** Convert a hexadecimal string to a field element. */
+static void secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen);
+
+#endif
diff --git a/src/field_10x26.h b/src/field_10x26.h
new file mode 100644
index 0000000000..66fb3f2563
--- /dev/null
+++ b/src/field_10x26.h
@@ -0,0 +1,21 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_REPR_
+#define _SECP256K1_FIELD_REPR_
+
+#include <stdint.h>
+
+typedef struct {
+    /* X = sum(i=0..9, elem[i]*2^26) mod n */
+    uint32_t n[10];
+#ifdef VERIFY
+    int magnitude;
+    int normalized;
+#endif
+} secp256k1_fe_t;
+
+#endif
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
new file mode 100644
index 0000000000..c0f1be0b2d
--- /dev/null
+++ b/src/field_10x26_impl.h
@@ -0,0 +1,884 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
+#define _SECP256K1_FIELD_REPR_IMPL_H_
+
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "num.h"
+#include "field.h"
+
+static void secp256k1_fe_inner_start(void) {}
+static void secp256k1_fe_inner_stop(void) {}
+
+#ifdef VERIFY
+static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
+    const uint32_t *d = a->n;
+    int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
+    r &= (d[0] <= 0x3FFFFFFUL * m);
+    r &= (d[1] <= 0x3FFFFFFUL * m);
+    r &= (d[2] <= 0x3FFFFFFUL * m);
+    r &= (d[3] <= 0x3FFFFFFUL * m);
+    r &= (d[4] <= 0x3FFFFFFUL * m);
+    r &= (d[5] <= 0x3FFFFFFUL * m);
+    r &= (d[6] <= 0x3FFFFFFUL * m);
+    r &= (d[7] <= 0x3FFFFFFUL * m);
+    r &= (d[8] <= 0x3FFFFFFUL * m);
+    r &= (d[9] <= 0x03FFFFFUL * m);
+    r &= (a->magnitude >= 0);
+    if (a->normalized) {
+        r &= (a->magnitude <= 1);
+        if (r && (d[9] == 0x03FFFFFUL)) {
+            uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
+            if (mid == 0x3FFFFFFUL) {
+                r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
+            }
+        }
+    }
+    VERIFY_CHECK(r == 1);
+}
+#else
+static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
+    (void)a;
+}
+#endif
+
+static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+    uint32_t m;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
+
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
+    /* At most a single final reduction is needed; check if the value is >= the field characteristic */
+    x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
+        & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
+
+    /* Apply the final reduction (for constant-time behaviour, we do it always) */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
+
+    /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
+    VERIFY_CHECK(t9 >> 22 == x);
+
+    /* Mask off the possible multiple of 2^256 from the final reduction */
+    t9 &= 0x03FFFFFUL;
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
+    r->n[0] = a;
+    r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    const uint32_t *t = a->n;
+    return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    return a->n[0] & 1;
+}
+
+SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
+#ifdef VERIFY
+    a->magnitude = 0;
+    a->normalized = 1;
+#endif
+    for (int i=0; i<10; i++) {
+        a->n[i] = 0;
+    }
+}
+
+SECP256K1_INLINE static int secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    VERIFY_CHECK(b->normalized);
+    secp256k1_fe_verify(a);
+    secp256k1_fe_verify(b);
+#endif
+    const uint32_t *t = a->n, *u = b->n;
+    return ((t[0]^u[0]) | (t[1]^u[1]) | (t[2]^u[2]) | (t[3]^u[3]) | (t[4]^u[4])
+          | (t[5]^u[5]) | (t[6]^u[6]) | (t[7]^u[7]) | (t[8]^u[8]) | (t[9]^u[9])) == 0;
+}
+
+static void secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
+    r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
+    r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
+    for (int i=0; i<32; i++) {
+        for (int j=0; j<4; j++) {
+            int limb = (8*i+2*j)/26;
+            int shift = (8*i+2*j)%26;
+            r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
+        }
+    }
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
+static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    for (int i=0; i<32; i++) {
+        int c = 0;
+        for (int j=0; j<4; j++) {
+            int limb = (8*i+2*j)/26;
+            int shift = (8*i+2*j)%26;
+            c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
+        }
+        r[31-i] = c;
+    }
+}
+
+SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= m);
+    secp256k1_fe_verify(a);
+#endif
+    r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
+    r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
+    r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
+    r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
+    r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
+    r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
+    r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
+    r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
+    r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
+    r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
+#ifdef VERIFY
+    r->magnitude = m + 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
+    r->n[0] *= a;
+    r->n[1] *= a;
+    r->n[2] *= a;
+    r->n[3] *= a;
+    r->n[4] *= a;
+    r->n[5] *= a;
+    r->n[6] *= a;
+    r->n[7] *= a;
+    r->n[8] *= a;
+    r->n[9] *= a;
+#ifdef VERIFY
+    r->magnitude *= a;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    secp256k1_fe_verify(a);
+#endif
+    r->n[0] += a->n[0];
+    r->n[1] += a->n[1];
+    r->n[2] += a->n[2];
+    r->n[3] += a->n[3];
+    r->n[4] += a->n[4];
+    r->n[5] += a->n[5];
+    r->n[6] += a->n[6];
+    r->n[7] += a->n[7];
+    r->n[8] += a->n[8];
+    r->n[9] += a->n[9];
+#ifdef VERIFY
+    r->magnitude += a->magnitude;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+#ifdef VERIFY
+#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
+#else
+#define VERIFY_BITS(x, n) do { } while(0)
+#endif
+
+SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t *b, uint32_t *r) {
+    VERIFY_BITS(a[0], 30);
+    VERIFY_BITS(a[1], 30);
+    VERIFY_BITS(a[2], 30);
+    VERIFY_BITS(a[3], 30);
+    VERIFY_BITS(a[4], 30);
+    VERIFY_BITS(a[5], 30);
+    VERIFY_BITS(a[6], 30);
+    VERIFY_BITS(a[7], 30);
+    VERIFY_BITS(a[8], 30);
+    VERIFY_BITS(a[9], 26);
+    VERIFY_BITS(b[0], 30);
+    VERIFY_BITS(b[1], 30);
+    VERIFY_BITS(b[2], 30);
+    VERIFY_BITS(b[3], 30);
+    VERIFY_BITS(b[4], 30);
+    VERIFY_BITS(b[5], 30);
+    VERIFY_BITS(b[6], 30);
+    VERIFY_BITS(b[7], 30);
+    VERIFY_BITS(b[8], 30);
+    VERIFY_BITS(b[9], 26);
+
+    const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
+    /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
+     */
+
+    uint64_t c, d;
+
+    d  = (uint64_t)a[0] * b[9]
+       + (uint64_t)a[1] * b[8]
+       + (uint64_t)a[2] * b[7]
+       + (uint64_t)a[3] * b[6]
+       + (uint64_t)a[4] * b[5]
+       + (uint64_t)a[5] * b[4]
+       + (uint64_t)a[6] * b[3]
+       + (uint64_t)a[7] * b[2]
+       + (uint64_t)a[8] * b[1]
+       + (uint64_t)a[9] * b[0];
+    /* VERIFY_BITS(d, 64); */
+    /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
+    uint32_t t9 = d & M; d >>= 26;
+    VERIFY_BITS(t9, 26);
+    VERIFY_BITS(d, 38);
+    /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
+
+    c  = (uint64_t)a[0] * b[0];
+    VERIFY_BITS(c, 60);
+    /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
+    d += (uint64_t)a[1] * b[9]
+       + (uint64_t)a[2] * b[8]
+       + (uint64_t)a[3] * b[7]
+       + (uint64_t)a[4] * b[6]
+       + (uint64_t)a[5] * b[5]
+       + (uint64_t)a[6] * b[4]
+       + (uint64_t)a[7] * b[3]
+       + (uint64_t)a[8] * b[2]
+       + (uint64_t)a[9] * b[1];
+    VERIFY_BITS(d, 63);
+    /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    uint64_t u0 = d & M; d >>= 26; c += u0 * R0;
+    VERIFY_BITS(u0, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 61);
+    /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    uint32_t t0 = c & M; c >>= 26; c += u0 * R1;
+    VERIFY_BITS(t0, 26);
+    VERIFY_BITS(c, 37);
+    /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+
+    c += (uint64_t)a[0] * b[1]
+       + (uint64_t)a[1] * b[0];
+    VERIFY_BITS(c, 62);
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    d += (uint64_t)a[2] * b[9]
+       + (uint64_t)a[3] * b[8]
+       + (uint64_t)a[4] * b[7]
+       + (uint64_t)a[5] * b[6]
+       + (uint64_t)a[6] * b[5]
+       + (uint64_t)a[7] * b[4]
+       + (uint64_t)a[8] * b[3]
+       + (uint64_t)a[9] * b[2];
+    VERIFY_BITS(d, 63);
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    uint64_t u1 = d & M; d >>= 26; c += u1 * R0;
+    VERIFY_BITS(u1, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 63);
+    /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    uint32_t t1 = c & M; c >>= 26; c += u1 * R1;
+    VERIFY_BITS(t1, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+
+    c += (uint64_t)a[0] * b[2]
+       + (uint64_t)a[1] * b[1]
+       + (uint64_t)a[2] * b[0];
+    VERIFY_BITS(c, 62);
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    d += (uint64_t)a[3] * b[9]
+       + (uint64_t)a[4] * b[8]
+       + (uint64_t)a[5] * b[7]
+       + (uint64_t)a[6] * b[6]
+       + (uint64_t)a[7] * b[5]
+       + (uint64_t)a[8] * b[4]
+       + (uint64_t)a[9] * b[3];
+    VERIFY_BITS(d, 63);
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    uint64_t u2 = d & M; d >>= 26; c += u2 * R0;
+    VERIFY_BITS(u2, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 63);
+    /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    uint32_t t2 = c & M; c >>= 26; c += u2 * R1;
+    VERIFY_BITS(t2, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[3]
+       + (uint64_t)a[1] * b[2]
+       + (uint64_t)a[2] * b[1]
+       + (uint64_t)a[3] * b[0];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    d += (uint64_t)a[4] * b[9]
+       + (uint64_t)a[5] * b[8]
+       + (uint64_t)a[6] * b[7]
+       + (uint64_t)a[7] * b[6]
+       + (uint64_t)a[8] * b[5]
+       + (uint64_t)a[9] * b[4];
+    VERIFY_BITS(d, 63);
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    uint64_t u3 = d & M; d >>= 26; c += u3 * R0;
+    VERIFY_BITS(u3, 26);
+    VERIFY_BITS(d, 37);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    uint32_t t3 = c & M; c >>= 26; c += u3 * R1;
+    VERIFY_BITS(t3, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[4]
+       + (uint64_t)a[1] * b[3]
+       + (uint64_t)a[2] * b[2]
+       + (uint64_t)a[3] * b[1]
+       + (uint64_t)a[4] * b[0];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[5] * b[9]
+       + (uint64_t)a[6] * b[8]
+       + (uint64_t)a[7] * b[7]
+       + (uint64_t)a[8] * b[6]
+       + (uint64_t)a[9] * b[5];
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    uint64_t u4 = d & M; d >>= 26; c += u4 * R0;
+    VERIFY_BITS(u4, 26);
+    VERIFY_BITS(d, 36);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    uint32_t t4 = c & M; c >>= 26; c += u4 * R1;
+    VERIFY_BITS(t4, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[5]
+       + (uint64_t)a[1] * b[4]
+       + (uint64_t)a[2] * b[3]
+       + (uint64_t)a[3] * b[2]
+       + (uint64_t)a[4] * b[1]
+       + (uint64_t)a[5] * b[0];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[6] * b[9]
+       + (uint64_t)a[7] * b[8]
+       + (uint64_t)a[8] * b[7]
+       + (uint64_t)a[9] * b[6];
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    uint64_t u5 = d & M; d >>= 26; c += u5 * R0;
+    VERIFY_BITS(u5, 26);
+    VERIFY_BITS(d, 36);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    uint32_t t5 = c & M; c >>= 26; c += u5 * R1;
+    VERIFY_BITS(t5, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[6]
+       + (uint64_t)a[1] * b[5]
+       + (uint64_t)a[2] * b[4]
+       + (uint64_t)a[3] * b[3]
+       + (uint64_t)a[4] * b[2]
+       + (uint64_t)a[5] * b[1]
+       + (uint64_t)a[6] * b[0];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[7] * b[9]
+       + (uint64_t)a[8] * b[8]
+       + (uint64_t)a[9] * b[7];
+    VERIFY_BITS(d, 61);
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    uint64_t u6 = d & M; d >>= 26; c += u6 * R0;
+    VERIFY_BITS(u6, 26);
+    VERIFY_BITS(d, 35);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    uint32_t t6 = c & M; c >>= 26; c += u6 * R1;
+    VERIFY_BITS(t6, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[7]
+       + (uint64_t)a[1] * b[6]
+       + (uint64_t)a[2] * b[5]
+       + (uint64_t)a[3] * b[4]
+       + (uint64_t)a[4] * b[3]
+       + (uint64_t)a[5] * b[2]
+       + (uint64_t)a[6] * b[1]
+       + (uint64_t)a[7] * b[0];
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x8000007C00000007ULL);
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[8] * b[9]
+       + (uint64_t)a[9] * b[8];
+    VERIFY_BITS(d, 58);
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    uint64_t u7 = d & M; d >>= 26; c += u7 * R0;
+    VERIFY_BITS(u7, 26);
+    VERIFY_BITS(d, 32);
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
+    /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    uint32_t t7 = c & M; c >>= 26; c += u7 * R1;
+    VERIFY_BITS(t7, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)a[0] * b[8]
+       + (uint64_t)a[1] * b[7]
+       + (uint64_t)a[2] * b[6]
+       + (uint64_t)a[3] * b[5]
+       + (uint64_t)a[4] * b[4]
+       + (uint64_t)a[5] * b[3]
+       + (uint64_t)a[6] * b[2]
+       + (uint64_t)a[7] * b[1]
+       + (uint64_t)a[8] * b[0];
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x9000007B80000008ULL);
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[9] * b[9];
+    VERIFY_BITS(d, 57);
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    uint64_t u8 = d & M; d >>= 26; c += u8 * R0;
+    VERIFY_BITS(u8, 26);
+    VERIFY_BITS(d, 31);
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[3] = t3;
+    VERIFY_BITS(r[3], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = t4;
+    VERIFY_BITS(r[4], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[5] = t5;
+    VERIFY_BITS(r[5], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[6] = t6;
+    VERIFY_BITS(r[6], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[7] = t7;
+    VERIFY_BITS(r[7], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[8] = c & M; c >>= 26; c += u8 * R1;
+    VERIFY_BITS(r[8], 26);
+    VERIFY_BITS(c, 39);
+    /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += d * R0 + t9;
+    VERIFY_BITS(c, 45);
+    /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
+    VERIFY_BITS(r[9], 22);
+    VERIFY_BITS(c, 46);
+    /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    d    = c * (R0 >> 4) + t0;
+    VERIFY_BITS(d, 56);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[0] = d & M; d >>= 26;
+    VERIFY_BITS(r[0], 26);
+    VERIFY_BITS(d, 30);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d   += c * (R1 >> 4) + t1;
+    VERIFY_BITS(d, 53);
+    VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[1] = d & M; d >>= 26;
+    VERIFY_BITS(r[1], 26);
+    VERIFY_BITS(d, 27);
+    VERIFY_CHECK(d <= 0x4000000ULL);
+    /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d   += t2;
+    VERIFY_BITS(d, 27);
+    /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = d;
+    VERIFY_BITS(r[2], 27);
+    /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint32_t *a, uint32_t *r) {
+    VERIFY_BITS(a[0], 30);
+    VERIFY_BITS(a[1], 30);
+    VERIFY_BITS(a[2], 30);
+    VERIFY_BITS(a[3], 30);
+    VERIFY_BITS(a[4], 30);
+    VERIFY_BITS(a[5], 30);
+    VERIFY_BITS(a[6], 30);
+    VERIFY_BITS(a[7], 30);
+    VERIFY_BITS(a[8], 30);
+    VERIFY_BITS(a[9], 26);
+
+    const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
+    /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
+     */
+
+    uint64_t c, d;
+
+    d  = (uint64_t)(a[0]*2) * a[9]
+       + (uint64_t)(a[1]*2) * a[8]
+       + (uint64_t)(a[2]*2) * a[7]
+       + (uint64_t)(a[3]*2) * a[6]
+       + (uint64_t)(a[4]*2) * a[5];
+    /* VERIFY_BITS(d, 64); */
+    /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
+    uint32_t t9 = d & M; d >>= 26;
+    VERIFY_BITS(t9, 26);
+    VERIFY_BITS(d, 38);
+    /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
+
+    c  = (uint64_t)a[0] * a[0];
+    VERIFY_BITS(c, 60);
+    /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
+    d += (uint64_t)(a[1]*2) * a[9]
+       + (uint64_t)(a[2]*2) * a[8]
+       + (uint64_t)(a[3]*2) * a[7]
+       + (uint64_t)(a[4]*2) * a[6]
+       + (uint64_t)a[5] * a[5];
+    VERIFY_BITS(d, 63);
+    /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    uint64_t u0 = d & M; d >>= 26; c += u0 * R0;
+    VERIFY_BITS(u0, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 61);
+    /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    uint32_t t0 = c & M; c >>= 26; c += u0 * R1;
+    VERIFY_BITS(t0, 26);
+    VERIFY_BITS(c, 37);
+    /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[1];
+    VERIFY_BITS(c, 62);
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    d += (uint64_t)(a[2]*2) * a[9]
+       + (uint64_t)(a[3]*2) * a[8]
+       + (uint64_t)(a[4]*2) * a[7]
+       + (uint64_t)(a[5]*2) * a[6];
+    VERIFY_BITS(d, 63);
+    /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    uint64_t u1 = d & M; d >>= 26; c += u1 * R0;
+    VERIFY_BITS(u1, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 63);
+    /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    uint32_t t1 = c & M; c >>= 26; c += u1 * R1;
+    VERIFY_BITS(t1, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[2]
+       + (uint64_t)a[1] * a[1];
+    VERIFY_BITS(c, 62);
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    d += (uint64_t)(a[3]*2) * a[9]
+       + (uint64_t)(a[4]*2) * a[8]
+       + (uint64_t)(a[5]*2) * a[7]
+       + (uint64_t)a[6] * a[6];
+    VERIFY_BITS(d, 63);
+    /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    uint64_t u2 = d & M; d >>= 26; c += u2 * R0;
+    VERIFY_BITS(u2, 26);
+    VERIFY_BITS(d, 37);
+    VERIFY_BITS(c, 63);
+    /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    uint32_t t2 = c & M; c >>= 26; c += u2 * R1;
+    VERIFY_BITS(t2, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[3]
+       + (uint64_t)(a[1]*2) * a[2];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    d += (uint64_t)(a[4]*2) * a[9]
+       + (uint64_t)(a[5]*2) * a[8]
+       + (uint64_t)(a[6]*2) * a[7];
+    VERIFY_BITS(d, 63);
+    /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    uint64_t u3 = d & M; d >>= 26; c += u3 * R0;
+    VERIFY_BITS(u3, 26);
+    VERIFY_BITS(d, 37);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    uint32_t t3 = c & M; c >>= 26; c += u3 * R1;
+    VERIFY_BITS(t3, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[4]
+       + (uint64_t)(a[1]*2) * a[3]
+       + (uint64_t)a[2] * a[2];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    d += (uint64_t)(a[5]*2) * a[9]
+       + (uint64_t)(a[6]*2) * a[8]
+       + (uint64_t)a[7] * a[7];
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    uint64_t u4 = d & M; d >>= 26; c += u4 * R0;
+    VERIFY_BITS(u4, 26);
+    VERIFY_BITS(d, 36);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    uint32_t t4 = c & M; c >>= 26; c += u4 * R1;
+    VERIFY_BITS(t4, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[5]
+       + (uint64_t)(a[1]*2) * a[4]
+       + (uint64_t)(a[2]*2) * a[3];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)(a[6]*2) * a[9]
+       + (uint64_t)(a[7]*2) * a[8];
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    uint64_t u5 = d & M; d >>= 26; c += u5 * R0;
+    VERIFY_BITS(u5, 26);
+    VERIFY_BITS(d, 36);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    uint32_t t5 = c & M; c >>= 26; c += u5 * R1;
+    VERIFY_BITS(t5, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[6]
+       + (uint64_t)(a[1]*2) * a[5]
+       + (uint64_t)(a[2]*2) * a[4]
+       + (uint64_t)a[3] * a[3];
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)(a[7]*2) * a[9]
+       + (uint64_t)a[8] * a[8];
+    VERIFY_BITS(d, 61);
+    /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    uint64_t u6 = d & M; d >>= 26; c += u6 * R0;
+    VERIFY_BITS(u6, 26);
+    VERIFY_BITS(d, 35);
+    /* VERIFY_BITS(c, 64); */
+    /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    uint32_t t6 = c & M; c >>= 26; c += u6 * R1;
+    VERIFY_BITS(t6, 26);
+    VERIFY_BITS(c, 39);
+    /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[7]
+       + (uint64_t)(a[1]*2) * a[6]
+       + (uint64_t)(a[2]*2) * a[5]
+       + (uint64_t)(a[3]*2) * a[4];
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x8000007C00000007ULL);
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)(a[8]*2) * a[9];
+    VERIFY_BITS(d, 58);
+    /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    uint64_t u7 = d & M; d >>= 26; c += u7 * R0;
+    VERIFY_BITS(u7, 26);
+    VERIFY_BITS(d, 32);
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
+    /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    uint32_t t7 = c & M; c >>= 26; c += u7 * R1;
+    VERIFY_BITS(t7, 26);
+    VERIFY_BITS(c, 38);
+    /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    c += (uint64_t)(a[0]*2) * a[8]
+       + (uint64_t)(a[1]*2) * a[7]
+       + (uint64_t)(a[2]*2) * a[6]
+       + (uint64_t)(a[3]*2) * a[5]
+       + (uint64_t)a[4] * a[4];
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x9000007B80000008ULL);
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d += (uint64_t)a[9] * a[9];
+    VERIFY_BITS(d, 57);
+    /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    uint64_t u8 = d & M; d >>= 26; c += u8 * R0;
+    VERIFY_BITS(u8, 26);
+    VERIFY_BITS(d, 31);
+    /* VERIFY_BITS(c, 64); */
+    VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[3] = t3;
+    VERIFY_BITS(r[3], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = t4;
+    VERIFY_BITS(r[4], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[5] = t5;
+    VERIFY_BITS(r[5], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[6] = t6;
+    VERIFY_BITS(r[6], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[7] = t7;
+    VERIFY_BITS(r[7], 26);
+    /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[8] = c & M; c >>= 26; c += u8 * R1;
+    VERIFY_BITS(r[8], 26);
+    VERIFY_BITS(c, 39);
+    /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += d * R0 + t9;
+    VERIFY_BITS(c, 45);
+    /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
+    VERIFY_BITS(r[9], 22);
+    VERIFY_BITS(c, 46);
+    /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    d    = c * (R0 >> 4) + t0;
+    VERIFY_BITS(d, 56);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[0] = d & M; d >>= 26;
+    VERIFY_BITS(r[0], 26);
+    VERIFY_BITS(d, 30);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d   += c * (R1 >> 4) + t1;
+    VERIFY_BITS(d, 53);
+    VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
+    /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[1] = d & M; d >>= 26;
+    VERIFY_BITS(r[1], 26);
+    VERIFY_BITS(d, 27);
+    VERIFY_CHECK(d <= 0x4000000ULL);
+    /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    d   += t2;
+    VERIFY_BITS(d, 27);
+    /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = d;
+    VERIFY_BITS(r[2], 27);
+    /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= 8);
+    VERIFY_CHECK(b->magnitude <= 8);
+    secp256k1_fe_verify(a);
+    secp256k1_fe_verify(b);
+#endif
+    secp256k1_fe_mul_inner(a->n, b->n, r->n);
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= 8);
+    secp256k1_fe_verify(a);
+#endif
+    secp256k1_fe_sqr_inner(a->n, r->n);
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+#endif
diff --git a/src/field_5x52.h b/src/field_5x52.h
new file mode 100644
index 0000000000..aeb0a6a1e8
--- /dev/null
+++ b/src/field_5x52.h
@@ -0,0 +1,21 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_REPR_
+#define _SECP256K1_FIELD_REPR_
+
+#include <stdint.h>
+
+typedef struct {
+    /* X = sum(i=0..4, elem[i]*2^52) mod n */
+    uint64_t n[5];
+#ifdef VERIFY
+    int magnitude;
+    int normalized;
+#endif
+} secp256k1_fe_t;
+
+#endif
diff --git a/src/field_5x52_asm.asm b/src/field_5x52_asm.asm
new file mode 100644
index 0000000000..5e785f7630
--- /dev/null
+++ b/src/field_5x52_asm.asm
@@ -0,0 +1,469 @@
+	;; Added by Diederik Huys, March 2013
+	;;
+	;; Provided public procedures:
+	;; 	secp256k1_fe_mul_inner
+	;; 	secp256k1_fe_sqr_inner
+	;;
+	;; Needed tools: YASM (http://yasm.tortall.net)
+	;;
+	;; 
+
+	BITS 64
+
+%ifidn   __OUTPUT_FORMAT__,macho64
+%define SYM(x) _ %+ x
+%else
+%define SYM(x) x
+%endif
+
+	;;  Procedure ExSetMult
+	;;  Register Layout:
+	;;  INPUT: 	rdi	= a->n
+	;; 	   	rsi  	= b->n
+	;; 	   	rdx  	= r->a
+	;; 
+	;;  INTERNAL:	rdx:rax  = multiplication accumulator
+	;; 		r9:r8    = c
+	;; 		r10-r13  = t0-t3
+	;; 		r14	 = b.n[0] / t4
+	;; 		r15	 = b.n[1] / t5
+	;; 		rbx	 = b.n[2] / t6
+	;; 		rcx	 = b.n[3] / t7
+	;; 		rbp	 = Constant 0FFFFFFFFFFFFFh / t8
+	;; 		rsi	 = b.n / b.n[4] / t9
+
+	GLOBAL SYM(secp256k1_fe_mul_inner)
+	ALIGN 32
+SYM(secp256k1_fe_mul_inner):
+	push rbp
+	push rbx
+	push r12
+	push r13
+	push r14
+	push r15
+	push rdx
+	mov r14,[rsi+8*0]	; preload b.n[0]. This will be the case until
+				; b.n[0] is no longer needed, then we reassign
+				; r14 to t4
+	;; c=a.n[0] * b.n[0]
+   	mov rax,[rdi+0*8]	; load a.n[0]
+	mov rbp,0FFFFFFFFFFFFFh
+	mul r14			; rdx:rax=a.n[0]*b.n[0]
+	mov r15,[rsi+1*8]
+	mov r10,rbp		; load modulus into target register for t0
+	mov r8,rax
+	and r10,rax		; only need lower qword of c
+	shrd r8,rdx,52
+	xor r9,r9		; c < 2^64, so we ditch the HO part 
+
+	;; c+=a.n[0] * b.n[1] + a.n[1] * b.n[0]
+	mov rax,[rdi+0*8]
+	mul r15			
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+1*8]
+	mul r14			
+	mov r11,rbp
+	mov rbx,[rsi+2*8]
+	add r8,rax
+	adc r9,rdx
+	and r11,r8
+	shrd r8,r9,52
+	xor r9,r9
+	
+	;; c+=a.n[0 1 2] * b.n[2 1 0]
+	mov rax,[rdi+0*8]
+	mul rbx			
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+1*8]
+	mul r15			
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+2*8]
+	mul r14
+	mov r12,rbp		
+	mov rcx,[rsi+3*8]
+	add r8,rax
+	adc r9,rdx
+	and r12,r8		
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=a.n[0 1 2 3] * b.n[3 2 1 0]
+	mov rax,[rdi+0*8]
+	mul rcx			
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+1*8]
+	mul rbx			
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+2*8]
+	mul r15			
+	add r8,rax
+	adc r9,rdx
+	
+	mov rax,[rdi+3*8]
+	mul r14			
+	mov r13,rbp             
+	mov rsi,[rsi+4*8]	; load b.n[4] and destroy pointer
+	add r8,rax
+	adc r9,rdx
+	and r13,r8
+
+	shrd r8,r9,52
+	xor r9,r9		
+
+
+	;; c+=a.n[0 1 2 3 4] * b.n[4 3 2 1 0]
+	mov rax,[rdi+0*8]
+	mul rsi
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+1*8]
+	mul rcx
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+2*8]
+	mul rbx			
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+3*8]
+	mul r15			
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+4*8]
+	mul r14			
+	mov r14,rbp             ; load modulus into t4 and destroy a.n[0]
+	add r8,rax
+	adc r9,rdx
+	and r14,r8
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=a.n[1 2 3 4] * b.n[4 3 2 1]
+	mov rax,[rdi+1*8]
+	mul rsi
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+2*8]
+	mul rcx
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+3*8]
+	mul rbx
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+4*8]
+	mul r15
+	mov r15,rbp		
+	add r8,rax
+	adc r9,rdx
+
+	and r15,r8
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=a.n[2 3 4] * b.n[4 3 2]
+	mov rax,[rdi+2*8]
+	mul rsi
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+3*8]
+	mul rcx
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+4*8]
+	mul rbx
+	mov rbx,rbp		
+	add r8,rax
+	adc r9,rdx
+
+	and rbx,r8		
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=a.n[3 4] * b.n[4 3]
+	mov rax,[rdi+3*8]
+	mul rsi
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,[rdi+4*8]
+	mul rcx
+	mov rcx,rbp		
+	add r8,rax
+	adc r9,rdx
+	and rcx,r8		
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=a.n[4] * b.n[4]
+	mov rax,[rdi+4*8]
+	mul rsi
+	;; mov rbp,rbp		; modulus already there!
+	add r8,rax
+	adc r9,rdx
+	and rbp,r8 
+	shrd r8,r9,52
+	xor r9,r9		
+
+	mov rsi,r8		; load c into t9 and destroy b.n[4]
+
+	;; *******************************************************
+common_exit_norm:
+	mov rdi,01000003D10h	; load constant
+
+	mov rax,r15		; get t5
+	mul rdi
+	add rax,r10    		; +t0
+	adc rdx,0
+	mov r10,0FFFFFFFFFFFFFh ; modulus. Sadly, we ran out of registers!
+	mov r8,rax		; +c
+	and r10,rax
+	shrd r8,rdx,52
+	xor r9,r9
+
+	mov rax,rbx		; get t6
+	mul rdi
+	add rax,r11		; +t1
+	adc rdx,0
+	mov r11,0FFFFFFFFFFFFFh ; modulus
+	add r8,rax		; +c
+	adc r9,rdx
+	and r11,r8
+	shrd r8,r9,52
+	xor r9,r9
+
+	mov rax,rcx    		; get t7
+	mul rdi
+	add rax,r12		; +t2
+	adc rdx,0
+	pop rbx			; retrieve pointer to this.n	
+	mov r12,0FFFFFFFFFFFFFh ; modulus
+	add r8,rax		; +c
+	adc r9,rdx
+	and r12,r8
+	mov [rbx+2*8],r12	; mov into this.n[2]
+	shrd r8,r9,52
+	xor r9,r9
+	
+	mov rax,rbp    		; get t8
+	mul rdi
+	add rax,r13    		; +t3
+	adc rdx,0
+	mov r13,0FFFFFFFFFFFFFh ; modulus
+	add r8,rax		; +c
+	adc r9,rdx
+	and r13,r8
+	mov [rbx+3*8],r13	; -> this.n[3]
+	shrd r8,r9,52
+	xor r9,r9
+	
+	mov rax,rsi    		; get t9
+	mul rdi
+	add rax,r14    		; +t4
+	adc rdx,0
+	mov r14,0FFFFFFFFFFFFh	; !!!
+	add r8,rax		; +c
+	adc r9,rdx
+	and r14,r8
+	mov [rbx+4*8],r14	; -> this.n[4]
+	shrd r8,r9,48		; !!!
+	xor r9,r9
+	
+	mov rax,01000003D1h
+	mul r8		
+	add rax,r10
+	adc rdx,0
+	mov r10,0FFFFFFFFFFFFFh ; modulus
+	mov r8,rax
+	and rax,r10
+	shrd r8,rdx,52
+	mov [rbx+0*8],rax	; -> this.n[0]
+	add r8,r11
+	mov [rbx+1*8],r8	; -> this.n[1]
+
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop rbx
+	pop rbp
+	ret
+
+	
+	;;  PROC ExSetSquare
+	;;  Register Layout:
+	;;  INPUT: 	rdi	 = a.n
+	;; 	   	rsi  	 = this.a
+	;;  INTERNAL:	rdx:rax  = multiplication accumulator
+	;; 		r9:r8    = c
+	;; 		r10-r13  = t0-t3
+	;; 		r14	 = a.n[0] / t4
+	;; 		r15	 = a.n[1] / t5
+	;; 		rbx	 = a.n[2] / t6
+	;; 		rcx	 = a.n[3] / t7
+	;; 		rbp	 = 0FFFFFFFFFFFFFh / t8
+	;; 		rsi	 = a.n[4] / t9
+	GLOBAL SYM(secp256k1_fe_sqr_inner)
+	ALIGN 32
+SYM(secp256k1_fe_sqr_inner):
+	push rbp
+	push rbx
+	push r12
+	push r13
+	push r14
+	push r15
+	push rsi
+	mov rbp,0FFFFFFFFFFFFFh
+	
+	;; c=a.n[0] * a.n[0]
+   	mov r14,[rdi+0*8]	; r14=a.n[0]
+	mov r10,rbp		; modulus 
+	mov rax,r14
+	mul rax
+	mov r15,[rdi+1*8]	; a.n[1]
+	add r14,r14		; r14=2*a.n[0]
+	mov r8,rax
+	and r10,rax		; only need lower qword
+	shrd r8,rdx,52
+	xor r9,r9
+
+	;; c+=2*a.n[0] * a.n[1]
+	mov rax,r14		; r14=2*a.n[0]
+	mul r15
+	mov rbx,[rdi+2*8]	; rbx=a.n[2]
+	mov r11,rbp 		; modulus
+	add r8,rax
+	adc r9,rdx
+	and r11,r8
+	shrd r8,r9,52
+	xor r9,r9
+	
+	;; c+=2*a.n[0]*a.n[2]+a.n[1]*a.n[1]
+	mov rax,r14
+	mul rbx
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,r15
+	mov r12,rbp		; modulus
+	mul rax
+	mov rcx,[rdi+3*8]	; rcx=a.n[3]
+	add r15,r15		; r15=a.n[1]*2
+	add r8,rax
+	adc r9,rdx
+	and r12,r8		; only need lower dword
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=2*a.n[0]*a.n[3]+2*a.n[1]*a.n[2]
+	mov rax,r14
+	mul rcx
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,r15		; rax=2*a.n[1]
+	mov r13,rbp		; modulus
+	mul rbx
+	mov rsi,[rdi+4*8]	; rsi=a.n[4]
+	add r8,rax
+	adc r9,rdx
+	and r13,r8
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=2*a.n[0]*a.n[4]+2*a.n[1]*a.n[3]+a.n[2]*a.n[2]
+	mov rax,r14		; last time we need 2*a.n[0]
+	mul rsi
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,r15
+	mul rcx
+	mov r14,rbp		; modulus
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,rbx
+	mul rax
+	add rbx,rbx		; rcx=2*a.n[2]
+	add r8,rax
+	adc r9,rdx
+	and r14,r8
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=2*a.n[1]*a.n[4]+2*a.n[2]*a.n[3]
+	mov rax,r15		; last time we need 2*a.n[1]
+	mul rsi
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,rbx
+	mul rcx
+	mov r15,rbp		; modulus
+	add r8,rax
+	adc r9,rdx
+	and r15,r8
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=2*a.n[2]*a.n[4]+a.n[3]*a.n[3]
+	mov rax,rbx		; last time we need 2*a.n[2]
+	mul rsi
+	add r8,rax
+	adc r9,rdx
+
+	mov rax,rcx		; a.n[3]
+	mul rax
+	mov rbx,rbp		; modulus
+	add r8,rax
+	adc r9,rdx
+	and rbx,r8		; only need lower dword
+	lea rax,[2*rcx]
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=2*a.n[3]*a.n[4]
+	mul rsi
+	mov rcx,rbp		; modulus
+	add r8,rax
+	adc r9,rdx
+	and rcx,r8		; only need lower dword
+	shrd r8,r9,52
+	xor r9,r9		
+
+	;; c+=a.n[4]*a.n[4]
+	mov rax,rsi
+	mul rax
+	;; mov rbp,rbp		; modulus is already there!
+	add r8,rax
+	adc r9,rdx
+	and rbp,r8 
+	shrd r8,r9,52
+	xor r9,r9		
+
+	mov rsi,r8
+
+	;; *******************************************************
+	jmp common_exit_norm
+	end
+
+	
diff --git a/src/field_5x52_asm_impl.h b/src/field_5x52_asm_impl.h
new file mode 100644
index 0000000000..f29605b11b
--- /dev/null
+++ b/src/field_5x52_asm_impl.h
@@ -0,0 +1,13 @@
+/**********************************************************************
+ * Copyright (c) 2013 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
+#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
+
+void __attribute__ ((sysv_abi)) secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r);
+void __attribute__ ((sysv_abi)) secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r);
+
+#endif
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
new file mode 100644
index 0000000000..d1b06d05a4
--- /dev/null
+++ b/src/field_5x52_impl.h
@@ -0,0 +1,260 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
+#define _SECP256K1_FIELD_REPR_IMPL_H_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include <string.h>
+#include "util.h"
+#include "num.h"
+#include "field.h"
+
+#if defined(USE_FIELD_5X52_ASM)
+#include "field_5x52_asm_impl.h"
+#elif defined(USE_FIELD_5X52_INT128)
+#include "field_5x52_int128_impl.h"
+#else
+#error "Please select field_5x52 implementation"
+#endif
+
+/** Implements arithmetic modulo FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F,
+ *  represented as 5 uint64_t's in base 2^52. The values are allowed to contain >52 each. In particular,
+ *  each FieldElem has a 'magnitude' associated with it. Internally, a magnitude M means each element
+ *  is at most M*(2^53-1), except the most significant one, which is limited to M*(2^49-1). All operations
+ *  accept any input with magnitude at most M, and have different rules for propagating magnitude to their
+ *  output.
+ */
+
+static void secp256k1_fe_inner_start(void) {}
+static void secp256k1_fe_inner_stop(void) {}
+
+#ifdef VERIFY
+static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
+    const uint64_t *d = a->n;
+    int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
+    r &= (d[0] <= 0xFFFFFFFFFFFFFULL * m);
+    r &= (d[1] <= 0xFFFFFFFFFFFFFULL * m);
+    r &= (d[2] <= 0xFFFFFFFFFFFFFULL * m);
+    r &= (d[3] <= 0xFFFFFFFFFFFFFULL * m);
+    r &= (d[4] <= 0x0FFFFFFFFFFFFULL * m);
+    r &= (a->magnitude >= 0);
+    if (a->normalized) {
+        r &= (a->magnitude <= 1);
+        if (r && (d[4] == 0x0FFFFFFFFFFFFULL) && ((d[3] & d[2] & d[1]) == 0xFFFFFFFFFFFFFULL)) {
+            r &= (d[0] < 0xFFFFEFFFFFC2FULL);
+        }
+    }
+    VERIFY_CHECK(r == 1);
+}
+#else
+static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
+    (void)a;
+}
+#endif
+
+static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+    uint64_t m;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; m = t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; m &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; m &= t3;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    /* At most a single final reduction is needed; check if the value is >= the field characteristic */
+    x = (t4 >> 48) | ((t4 == 0x0FFFFFFFFFFFFULL) & (m == 0xFFFFFFFFFFFFFULL)
+        & (t0 >= 0xFFFFEFFFFFC2FULL));
+
+    /* Apply the final reduction (for constant-time behaviour, we do it always) */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+    /* If t4 didn't carry to bit 48 already, then it should have after any final reduction */
+    VERIFY_CHECK(t4 >> 48 == x);
+
+    /* Mask off the possible multiple of 2^256 from the final reduction */
+    t4 &= 0x0FFFFFFFFFFFFULL;
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
+    r->n[0] = a;
+    r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    const uint64_t *t = a->n;
+    return (t[0] | t[1] | t[2] | t[3] | t[4]) == 0;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    return a->n[0] & 1;
+}
+
+SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
+#ifdef VERIFY
+    a->magnitude = 0;
+    a->normalized = 1;
+#endif
+    for (int i=0; i<5; i++) {
+        a->n[i] = 0;
+    }
+}
+
+SECP256K1_INLINE static int secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    VERIFY_CHECK(b->normalized);
+    secp256k1_fe_verify(a);
+    secp256k1_fe_verify(b);
+#endif
+    const uint64_t *t = a->n, *u = b->n;
+    return ((t[0]^u[0]) | (t[1]^u[1]) | (t[2]^u[2]) | (t[3]^u[3]) | (t[4]^u[4])) == 0;
+}
+
+static void secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
+    r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
+    for (int i=0; i<32; i++) {
+        for (int j=0; j<2; j++) {
+            int limb = (8*i+4*j)/52;
+            int shift = (8*i+4*j)%52;
+            r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift;
+        }
+    }
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
+static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->normalized);
+    secp256k1_fe_verify(a);
+#endif
+    for (int i=0; i<32; i++) {
+        int c = 0;
+        for (int j=0; j<2; j++) {
+            int limb = (8*i+4*j)/52;
+            int shift = (8*i+4*j)%52;
+            c |= ((a->n[limb] >> shift) & 0xF) << (4 * j);
+        }
+        r[31-i] = c;
+    }
+}
+
+SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= m);
+    secp256k1_fe_verify(a);
+#endif
+    r->n[0] = 0xFFFFEFFFFFC2FULL * 2 * (m + 1) - a->n[0];
+    r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[1];
+    r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[2];
+    r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * (m + 1) - a->n[3];
+    r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * (m + 1) - a->n[4];
+#ifdef VERIFY
+    r->magnitude = m + 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
+    r->n[0] *= a;
+    r->n[1] *= a;
+    r->n[2] *= a;
+    r->n[3] *= a;
+    r->n[4] *= a;
+#ifdef VERIFY
+    r->magnitude *= a;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    secp256k1_fe_verify(a);
+#endif
+    r->n[0] += a->n[0];
+    r->n[1] += a->n[1];
+    r->n[2] += a->n[2];
+    r->n[3] += a->n[3];
+    r->n[4] += a->n[4];
+#ifdef VERIFY
+    r->magnitude += a->magnitude;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= 8);
+    VERIFY_CHECK(b->magnitude <= 8);
+    secp256k1_fe_verify(a);
+    secp256k1_fe_verify(b);
+#endif
+    secp256k1_fe_mul_inner(a->n, b->n, r->n);
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#ifdef VERIFY
+    VERIFY_CHECK(a->magnitude <= 8);
+    secp256k1_fe_verify(a);
+#endif
+    secp256k1_fe_sqr_inner(a->n, r->n);
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 0;
+    secp256k1_fe_verify(r);
+#endif
+}
+
+#endif
diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h
new file mode 100644
index 0000000000..c476428672
--- /dev/null
+++ b/src/field_5x52_int128_impl.h
@@ -0,0 +1,279 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
+#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
+
+#include <stdint.h>
+
+#ifdef VERIFY
+#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
+#else
+#define VERIFY_BITS(x, n) do { } while(0)
+#endif
+
+SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) {
+    VERIFY_BITS(a[0], 56);
+    VERIFY_BITS(a[1], 56);
+    VERIFY_BITS(a[2], 56);
+    VERIFY_BITS(a[3], 56);
+    VERIFY_BITS(a[4], 52);
+    VERIFY_BITS(b[0], 56);
+    VERIFY_BITS(b[1], 56);
+    VERIFY_BITS(b[2], 56);
+    VERIFY_BITS(b[3], 56);
+    VERIFY_BITS(b[4], 52);
+
+    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
+    /*  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0] = [x*R].
+     */
+
+    __int128 c, d;
+
+    d  = (__int128)a[0] * b[3]
+       + (__int128)a[1] * b[2]
+       + (__int128)a[2] * b[1]
+       + (__int128)a[3] * b[0];
+    VERIFY_BITS(d, 114);
+    /* [d 0 0 0] = [p3 0 0 0] */
+    c  = (__int128)a[4] * b[4];
+    VERIFY_BITS(c, 112);
+    /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    d += (c & M) * R; c >>= 52;
+    VERIFY_BITS(d, 115);
+    VERIFY_BITS(c, 60);
+    /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    uint64_t t3 = d & M; d >>= 52;
+    VERIFY_BITS(t3, 52);
+    VERIFY_BITS(d, 63);
+    /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+
+    d += (__int128)a[0] * b[4]
+       + (__int128)a[1] * b[3]
+       + (__int128)a[2] * b[2]
+       + (__int128)a[3] * b[1]
+       + (__int128)a[4] * b[0];
+    VERIFY_BITS(d, 115);
+    /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    d += c * R;
+    VERIFY_BITS(d, 116);
+    /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    uint64_t t4 = d & M; d >>= 52;
+    VERIFY_BITS(t4, 52);
+    VERIFY_BITS(d, 64);
+    /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    uint64_t tx = (t4 >> 48); t4 &= (M >> 4);
+    VERIFY_BITS(tx, 4);
+    VERIFY_BITS(t4, 48);
+    /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+
+    c  = (__int128)a[0] * b[0];
+    VERIFY_BITS(c, 112);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
+    d += (__int128)a[1] * b[4]
+       + (__int128)a[2] * b[3]
+       + (__int128)a[3] * b[2]
+       + (__int128)a[4] * b[1];
+    VERIFY_BITS(d, 115);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    uint64_t u0 = d & M; d >>= 52;
+    VERIFY_BITS(u0, 52);
+    VERIFY_BITS(d, 63);
+    /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = (u0 << 4) | tx;
+    VERIFY_BITS(u0, 56);
+    /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    c += (__int128)u0 * (R >> 4);
+    VERIFY_BITS(c, 115);
+    /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    uint64_t t0 = c & M; c >>= 52;
+    VERIFY_BITS(t0, 52);
+    VERIFY_BITS(c, 61);
+    /* [d 0 t4 t3 0 c t0] = [p8 0 0 p5 p4 p3 0 0 p0] */
+
+    c += (__int128)a[0] * b[1]
+       + (__int128)a[1] * b[0];
+    VERIFY_BITS(c, 114);
+    /* [d 0 t4 t3 0 c t0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
+    d += (__int128)a[2] * b[4]
+       + (__int128)a[3] * b[3]
+       + (__int128)a[4] * b[2];
+    VERIFY_BITS(d, 114);
+    /* [d 0 t4 t3 0 c t0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    c += (d & M) * R; d >>= 52;
+    VERIFY_BITS(c, 115);
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 t4 t3 0 c t0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    uint64_t t1 = c & M; c >>= 52;
+    VERIFY_BITS(t1, 52);
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 t4 t3 c t1 t0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+
+    c += (__int128)a[0] * b[2]
+       + (__int128)a[1] * b[1]
+       + (__int128)a[2] * b[0];
+    VERIFY_BITS(c, 114);
+    /* [d 0 0 t4 t3 c t1 t0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
+    d += (__int128)a[3] * b[4]
+       + (__int128)a[4] * b[3];
+    VERIFY_BITS(d, 114);
+    /* [d 0 0 t4 t3 c t1 t0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c += (d & M) * R; d >>= 52;
+    VERIFY_BITS(c, 115);
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 t4 t3 c t1 t0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    r[0] = t0;
+    VERIFY_BITS(r[0], 52);
+    /* [d 0 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[1] = t1;
+    VERIFY_BITS(r[1], 52);
+    /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = c & M; c >>= 52;
+    VERIFY_BITS(r[2], 52);
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += d * R + t3;;
+    VERIFY_BITS(c, 100);
+    /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[3] = c & M; c >>= 52;
+    VERIFY_BITS(r[3], 52);
+    VERIFY_BITS(c, 48);
+    /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += t4;
+    VERIFY_BITS(c, 49);
+    /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = c;
+    VERIFY_BITS(r[4], 49);
+    /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r) {
+    VERIFY_BITS(a[0], 56);
+    VERIFY_BITS(a[1], 56);
+    VERIFY_BITS(a[2], 56);
+    VERIFY_BITS(a[3], 56);
+    VERIFY_BITS(a[4], 52);
+
+    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
+    /**  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
+     *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
+     *  Note that [x 0 0 0 0 0] = [x*R].
+     */
+
+    __int128 c, d;
+
+    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
+
+    d  = (__int128)(a0*2) * a3
+       + (__int128)(a1*2) * a2;
+    VERIFY_BITS(d, 114);
+    /* [d 0 0 0] = [p3 0 0 0] */
+    c  = (__int128)a4 * a4;
+    VERIFY_BITS(c, 112);
+    /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    d += (c & M) * R; c >>= 52;
+    VERIFY_BITS(d, 115);
+    VERIFY_BITS(c, 60);
+    /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+    uint64_t t3 = d & M; d >>= 52;
+    VERIFY_BITS(t3, 52);
+    VERIFY_BITS(d, 63);
+    /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
+
+    a4 *= 2;
+    d += (__int128)a0 * a4
+       + (__int128)(a1*2) * a3
+       + (__int128)a2 * a2;
+    VERIFY_BITS(d, 115);
+    /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    d += c * R;
+    VERIFY_BITS(d, 116);
+    /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    uint64_t t4 = d & M; d >>= 52;
+    VERIFY_BITS(t4, 52);
+    VERIFY_BITS(d, 64);
+    /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+    uint64_t tx = (t4 >> 48); t4 &= (M >> 4);
+    VERIFY_BITS(tx, 4);
+    VERIFY_BITS(t4, 48);
+    /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
+
+    c  = (__int128)a0 * a0;
+    VERIFY_BITS(c, 112);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
+    d += (__int128)a1 * a4
+       + (__int128)(a2*2) * a3;
+    VERIFY_BITS(d, 114);
+    /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    uint64_t u0 = d & M; d >>= 52;
+    VERIFY_BITS(u0, 52);
+    VERIFY_BITS(d, 62);
+    /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    u0 = (u0 << 4) | tx;
+    VERIFY_BITS(u0, 56);
+    /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    c += (__int128)u0 * (R >> 4);
+    VERIFY_BITS(c, 113);
+    /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
+    r[0] = c & M; c >>= 52;
+    VERIFY_BITS(r[0], 52);
+    VERIFY_BITS(c, 61);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
+
+    a0 *= 2;
+    c += (__int128)a0 * a1;
+    VERIFY_BITS(c, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
+    d += (__int128)a2 * a4
+       + (__int128)a3 * a3;
+    VERIFY_BITS(d, 114);
+    /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    c += (d & M) * R; d >>= 52;
+    VERIFY_BITS(c, 115);
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+    r[1] = c & M; c >>= 52;
+    VERIFY_BITS(r[1], 52);
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
+
+    c += (__int128)a0 * a2
+       + (__int128)a1 * a1;
+    VERIFY_BITS(c, 114);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
+    d += (__int128)a3 * a4;
+    VERIFY_BITS(d, 114);
+    /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c += (d & M) * R; d >>= 52;
+    VERIFY_BITS(c, 115);
+    VERIFY_BITS(d, 62);
+    /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[2] = c & M; c >>= 52;
+    VERIFY_BITS(r[2], 52);
+    VERIFY_BITS(c, 63);
+    /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+
+    c   += d * R + t3;;
+    VERIFY_BITS(c, 100);
+    /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[3] = c & M; c >>= 52;
+    VERIFY_BITS(r[3], 52);
+    VERIFY_BITS(c, 48);
+    /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    c   += t4;
+    VERIFY_BITS(c, 49);
+    /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+    r[4] = c;
+    VERIFY_BITS(r[4], 49);
+    /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
+}
+
+#endif
diff --git a/src/field_gmp.h b/src/field_gmp.h
new file mode 100644
index 0000000000..b390fd9de8
--- /dev/null
+++ b/src/field_gmp.h
@@ -0,0 +1,18 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_REPR_
+#define _SECP256K1_FIELD_REPR_
+
+#include <gmp.h>
+
+#define FIELD_LIMBS ((256 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS)
+
+typedef struct {
+    mp_limb_t n[FIELD_LIMBS+1];
+} secp256k1_fe_t;
+
+#endif
diff --git a/src/field_gmp_impl.h b/src/field_gmp_impl.h
new file mode 100644
index 0000000000..af4728e5b4
--- /dev/null
+++ b/src/field_gmp_impl.h
@@ -0,0 +1,163 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
+#define _SECP256K1_FIELD_REPR_IMPL_H_
+
+#include <stdio.h>
+#include <string.h>
+#include "num.h"
+#include "field.h"
+
+static mp_limb_t secp256k1_field_p[FIELD_LIMBS];
+static mp_limb_t secp256k1_field_pc[(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS];
+
+static void secp256k1_fe_inner_start(void) {
+    for (int i=0; i<(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS; i++)
+        secp256k1_field_pc[i] = 0;
+    secp256k1_field_pc[0] += 0x3D1UL;
+    secp256k1_field_pc[32/GMP_NUMB_BITS] += (((mp_limb_t)1) << (32 % GMP_NUMB_BITS));
+    for (int i=0; i<FIELD_LIMBS; i++) {
+        secp256k1_field_p[i] = 0;
+    }
+    mpn_sub(secp256k1_field_p, secp256k1_field_p, FIELD_LIMBS, secp256k1_field_pc, (33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS);
+}
+
+static void secp256k1_fe_inner_stop(void) {
+}
+
+static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
+    if (r->n[FIELD_LIMBS] != 0) {
+#if (GMP_NUMB_BITS >= 40)
+        mp_limb_t carry = mpn_add_1(r->n, r->n, FIELD_LIMBS, 0x1000003D1ULL * r->n[FIELD_LIMBS]);
+        mpn_add_1(r->n, r->n, FIELD_LIMBS, 0x1000003D1ULL * carry);
+#else
+        mp_limb_t carry = mpn_add_1(r->n, r->n, FIELD_LIMBS, 0x3D1UL * r->n[FIELD_LIMBS]) +
+                          mpn_add_1(r->n+(32/GMP_NUMB_BITS), r->n+(32/GMP_NUMB_BITS), FIELD_LIMBS-(32/GMP_NUMB_BITS), r->n[FIELD_LIMBS] << (32 % GMP_NUMB_BITS));
+        mpn_add_1(r->n, r->n, FIELD_LIMBS, 0x3D1UL * carry);
+        mpn_add_1(r->n+(32/GMP_NUMB_BITS), r->n+(32/GMP_NUMB_BITS), FIELD_LIMBS-(32/GMP_NUMB_BITS), carry << (32%GMP_NUMB_BITS));
+#endif
+        r->n[FIELD_LIMBS] = 0;
+    }
+    if (mpn_cmp(r->n, secp256k1_field_p, FIELD_LIMBS) >= 0)
+        mpn_sub(r->n, r->n, FIELD_LIMBS, secp256k1_field_p, FIELD_LIMBS);
+}
+
+SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
+    r->n[0] = a;
+    for (int i=1; i<FIELD_LIMBS+1; i++)
+        r->n[i] = 0;
+}
+
+SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *r) {
+    for (int i=0; i<FIELD_LIMBS+1; i++)
+        r->n[i] = 0;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
+    int ret = 1;
+    for (int i=0; i<FIELD_LIMBS+1; i++)
+        ret &= (a->n[i] == 0);
+    return ret;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
+    return a->n[0] & 1;
+}
+
+SECP256K1_INLINE static int secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    int ret = 1;
+    for (int i=0; i<FIELD_LIMBS+1; i++)
+        ret &= (a->n[i] == b->n[i]);
+    return ret;
+}
+
+static void secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
+    for (int i=0; i<FIELD_LIMBS+1; i++)
+        r->n[i] = 0;
+    for (int i=0; i<256; i++) {
+        int limb = i/GMP_NUMB_BITS;
+        int shift = i%GMP_NUMB_BITS;
+        r->n[limb] |= (mp_limb_t)((a[31-i/8] >> (i%8)) & 0x1) << shift;
+    }
+}
+
+/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
+static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
+    for (int i=0; i<32; i++) {
+        int c = 0;
+        for (int j=0; j<8; j++) {
+            int limb = (8*i+j)/GMP_NUMB_BITS;
+            int shift = (8*i+j)%GMP_NUMB_BITS;
+            c |= ((a->n[limb] >> shift) & 0x1) << j;
+        }
+        r[31-i] = c;
+    }
+}
+
+SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
+    (void)m;
+    *r = *a;
+    secp256k1_fe_normalize(r);
+    for (int i=0; i<FIELD_LIMBS; i++)
+        r->n[i] = ~(r->n[i]);
+#if (GMP_NUMB_BITS >= 33)
+    mpn_sub_1(r->n, r->n, FIELD_LIMBS, 0x1000003D0ULL);
+#else
+    mpn_sub_1(r->n, r->n, FIELD_LIMBS, 0x3D0UL);
+    mpn_sub_1(r->n+(32/GMP_NUMB_BITS), r->n+(32/GMP_NUMB_BITS), FIELD_LIMBS-(32/GMP_NUMB_BITS), 0x1UL << (32%GMP_NUMB_BITS));
+#endif
+}
+
+SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
+    mpn_mul_1(r->n, r->n, FIELD_LIMBS+1, a);
+}
+
+SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+    mpn_add(r->n, r->n, FIELD_LIMBS+1, a->n, FIELD_LIMBS+1);
+}
+
+static void secp256k1_fe_reduce(secp256k1_fe_t *r, mp_limb_t *tmp) {
+    /** <A1 A2 A3 A4> <B1 B2 B3 B4>
+     *        B1 B2 B3 B4
+     *  + C * A1 A2 A3 A4
+     *  +  A1 A2 A3 A4
+     */
+
+#if (GMP_NUMB_BITS >= 33)
+    mp_limb_t o = mpn_addmul_1(tmp, tmp+FIELD_LIMBS, FIELD_LIMBS, 0x1000003D1ULL);
+#else
+    mp_limb_t o = mpn_addmul_1(tmp, tmp+FIELD_LIMBS, FIELD_LIMBS, 0x3D1UL) +
+                  mpn_addmul_1(tmp+(32/GMP_NUMB_BITS), tmp+FIELD_LIMBS, FIELD_LIMBS-(32/GMP_NUMB_BITS), 0x1UL << (32%GMP_NUMB_BITS));
+#endif
+    mp_limb_t q[1+(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS];
+    q[(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS] = mpn_mul_1(q, secp256k1_field_pc, (33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS, o);
+#if (GMP_NUMB_BITS <= 32)
+    mp_limb_t o2 = tmp[2*FIELD_LIMBS-(32/GMP_NUMB_BITS)] << (32%GMP_NUMB_BITS);
+    q[(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS] += mpn_addmul_1(q, secp256k1_field_pc, (33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS, o2);
+#endif
+    r->n[FIELD_LIMBS] = mpn_add(r->n, tmp, FIELD_LIMBS, q, 1+(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS);
+}
+
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    secp256k1_fe_t ac = *a;
+    secp256k1_fe_t bc = *b;
+    secp256k1_fe_normalize(&ac);
+    secp256k1_fe_normalize(&bc);
+    mp_limb_t tmp[2*FIELD_LIMBS];
+    mpn_mul_n(tmp, ac.n, bc.n, FIELD_LIMBS);
+    secp256k1_fe_reduce(r, tmp);
+}
+
+static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+    secp256k1_fe_t ac = *a;
+    secp256k1_fe_normalize(&ac);
+    mp_limb_t tmp[2*FIELD_LIMBS];
+    mpn_sqr(tmp, ac.n, FIELD_LIMBS);
+    secp256k1_fe_reduce(r, tmp);
+}
+
+#endif
diff --git a/src/field_impl.h b/src/field_impl.h
new file mode 100644
index 0000000000..3a31e1844e
--- /dev/null
+++ b/src/field_impl.h
@@ -0,0 +1,293 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_FIELD_IMPL_H_
+#define _SECP256K1_FIELD_IMPL_H_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include "util.h"
+
+#if defined(USE_FIELD_GMP)
+#include "field_gmp_impl.h"
+#elif defined(USE_FIELD_10X26)
+#include "field_10x26_impl.h"
+#elif defined(USE_FIELD_5X52)
+#include "field_5x52_impl.h"
+#else
+#error "Please select field implementation"
+#endif
+
+static void secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a) {
+    if (*rlen < 65) {
+        *rlen = 65;
+        return;
+    }
+    *rlen = 65;
+    unsigned char tmp[32];
+    secp256k1_fe_t b = *a;
+    secp256k1_fe_normalize(&b);
+    secp256k1_fe_get_b32(tmp, &b);
+    for (int i=0; i<32; i++) {
+        static const char *c = "0123456789ABCDEF";
+        r[2*i]   = c[(tmp[i] >> 4) & 0xF];
+        r[2*i+1] = c[(tmp[i]) & 0xF];
+    }
+    r[64] = 0x00;
+}
+
+static void secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) {
+    unsigned char tmp[32] = {};
+    static const int cvt[256] = {0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 1, 2, 3, 4, 5, 6,7,8,9,0,0,0,0,0,0,
+                                 0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0};
+    for (int i=0; i<32; i++) {
+        if (alen > i*2)
+            tmp[32 - alen/2 + i] = (cvt[(unsigned char)a[2*i]] << 4) + cvt[(unsigned char)a[2*i+1]];
+    }
+    secp256k1_fe_set_b32(r, tmp);
+}
+
+static int secp256k1_fe_sqrt(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+
+    /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
+     *  { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
+     *  1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
+     */
+
+    secp256k1_fe_t x2;
+    secp256k1_fe_sqr(&x2, a);
+    secp256k1_fe_mul(&x2, &x2, a);
+
+    secp256k1_fe_t x3;
+    secp256k1_fe_sqr(&x3, &x2);
+    secp256k1_fe_mul(&x3, &x3, a);
+
+    secp256k1_fe_t x6 = x3;
+    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6);
+    secp256k1_fe_mul(&x6, &x6, &x3);
+
+    secp256k1_fe_t x9 = x6;
+    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9);
+    secp256k1_fe_mul(&x9, &x9, &x3);
+
+    secp256k1_fe_t x11 = x9;
+    for (int j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11);
+    secp256k1_fe_mul(&x11, &x11, &x2);
+
+    secp256k1_fe_t x22 = x11;
+    for (int j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22);
+    secp256k1_fe_mul(&x22, &x22, &x11);
+
+    secp256k1_fe_t x44 = x22;
+    for (int j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44);
+    secp256k1_fe_mul(&x44, &x44, &x22);
+
+    secp256k1_fe_t x88 = x44;
+    for (int j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88);
+    secp256k1_fe_mul(&x88, &x88, &x44);
+
+    secp256k1_fe_t x176 = x88;
+    for (int j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176);
+    secp256k1_fe_mul(&x176, &x176, &x88);
+
+    secp256k1_fe_t x220 = x176;
+    for (int j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220);
+    secp256k1_fe_mul(&x220, &x220, &x44);
+
+    secp256k1_fe_t x223 = x220;
+    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223);
+    secp256k1_fe_mul(&x223, &x223, &x3);
+
+    /* The final result is then assembled using a sliding window over the blocks. */
+
+    secp256k1_fe_t t1 = x223;
+    for (int j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_mul(&t1, &t1, &x22);
+    for (int j=0; j<6; j++) secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_mul(&t1, &t1, &x2);
+    secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_sqr(r, &t1);
+
+    /* Check that a square root was actually calculated */
+
+    secp256k1_fe_sqr(&t1, r);
+    secp256k1_fe_negate(&t1, &t1, 1);
+    secp256k1_fe_add(&t1, a);
+    secp256k1_fe_normalize(&t1);
+    return secp256k1_fe_is_zero(&t1);
+}
+
+static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+
+    /** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
+     *  { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
+     *  [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
+     */
+
+    secp256k1_fe_t x2;
+    secp256k1_fe_sqr(&x2, a);
+    secp256k1_fe_mul(&x2, &x2, a);
+
+    secp256k1_fe_t x3;
+    secp256k1_fe_sqr(&x3, &x2);
+    secp256k1_fe_mul(&x3, &x3, a);
+
+    secp256k1_fe_t x6 = x3;
+    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6);
+    secp256k1_fe_mul(&x6, &x6, &x3);
+
+    secp256k1_fe_t x9 = x6;
+    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9);
+    secp256k1_fe_mul(&x9, &x9, &x3);
+
+    secp256k1_fe_t x11 = x9;
+    for (int j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11);
+    secp256k1_fe_mul(&x11, &x11, &x2);
+
+    secp256k1_fe_t x22 = x11;
+    for (int j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22);
+    secp256k1_fe_mul(&x22, &x22, &x11);
+
+    secp256k1_fe_t x44 = x22;
+    for (int j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44);
+    secp256k1_fe_mul(&x44, &x44, &x22);
+
+    secp256k1_fe_t x88 = x44;
+    for (int j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88);
+    secp256k1_fe_mul(&x88, &x88, &x44);
+
+    secp256k1_fe_t x176 = x88;
+    for (int j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176);
+    secp256k1_fe_mul(&x176, &x176, &x88);
+
+    secp256k1_fe_t x220 = x176;
+    for (int j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220);
+    secp256k1_fe_mul(&x220, &x220, &x44);
+
+    secp256k1_fe_t x223 = x220;
+    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223);
+    secp256k1_fe_mul(&x223, &x223, &x3);
+
+    /* The final result is then assembled using a sliding window over the blocks. */
+
+    secp256k1_fe_t t1 = x223;
+    for (int j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_mul(&t1, &t1, &x22);
+    for (int j=0; j<5; j++) secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_mul(&t1, &t1, a);
+    for (int j=0; j<3; j++) secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_mul(&t1, &t1, &x2);
+    for (int j=0; j<2; j++) secp256k1_fe_sqr(&t1, &t1);
+    secp256k1_fe_mul(r, &t1, a);
+}
+
+static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+#if defined(USE_FIELD_INV_BUILTIN)
+    secp256k1_fe_inv(r, a);
+#elif defined(USE_FIELD_INV_NUM)
+    unsigned char b[32];
+    secp256k1_fe_t c = *a;
+    secp256k1_fe_normalize(&c);
+    secp256k1_fe_get_b32(b, &c);
+    secp256k1_num_t n;
+    secp256k1_num_set_bin(&n, b, 32);
+    secp256k1_num_mod_inverse(&n, &n, &secp256k1_fe_consts->p);
+    secp256k1_num_get_bin(b, 32, &n);
+    secp256k1_fe_set_b32(r, b);
+#else
+#error "Please select field inverse implementation"
+#endif
+}
+
+static void secp256k1_fe_inv_all(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]) {
+    if (len < 1)
+        return;
+
+    VERIFY_CHECK((r + len <= a) || (a + len <= r));
+
+    r[0] = a[0];
+
+    size_t i = 0;
+    while (++i < len) {
+        secp256k1_fe_mul(&r[i], &r[i - 1], &a[i]);
+    }
+
+    secp256k1_fe_t u; secp256k1_fe_inv(&u, &r[--i]);
+
+    while (i > 0) {
+        int j = i--;
+        secp256k1_fe_mul(&r[j], &r[i], &u);
+        secp256k1_fe_mul(&u, &u, &a[j]);
+    }
+
+    r[0] = u;
+}
+
+static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]) {
+    if (len < 1)
+        return;
+
+    VERIFY_CHECK((r + len <= a) || (a + len <= r));
+
+    r[0] = a[0];
+
+    size_t i = 0;
+    while (++i < len) {
+        secp256k1_fe_mul(&r[i], &r[i - 1], &a[i]);
+    }
+
+    secp256k1_fe_t u; secp256k1_fe_inv_var(&u, &r[--i]);
+
+    while (i > 0) {
+        int j = i--;
+        secp256k1_fe_mul(&r[j], &r[i], &u);
+        secp256k1_fe_mul(&u, &u, &a[j]);
+    }
+
+    r[0] = u;
+}
+
+static void secp256k1_fe_start(void) {
+    static const unsigned char secp256k1_fe_consts_p[] = {
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
+    };
+    if (secp256k1_fe_consts == NULL) {
+        secp256k1_fe_inner_start();
+        secp256k1_fe_consts_t *ret = (secp256k1_fe_consts_t*)malloc(sizeof(secp256k1_fe_consts_t));
+        secp256k1_num_set_bin(&ret->p, secp256k1_fe_consts_p, sizeof(secp256k1_fe_consts_p));
+        secp256k1_fe_consts = ret;
+    }
+}
+
+static void secp256k1_fe_stop(void) {
+    if (secp256k1_fe_consts != NULL) {
+        secp256k1_fe_consts_t *c = (secp256k1_fe_consts_t*)secp256k1_fe_consts;
+        free((void*)c);
+        secp256k1_fe_consts = NULL;
+        secp256k1_fe_inner_stop();
+    }
+}
+
+#endif
diff --git a/src/group.h b/src/group.h
new file mode 100644
index 0000000000..ba02549821
--- /dev/null
+++ b/src/group.h
@@ -0,0 +1,128 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_GROUP_
+#define _SECP256K1_GROUP_
+
+#include "num.h"
+#include "field.h"
+
+/** A group element of the secp256k1 curve, in affine coordinates. */
+typedef struct {
+    secp256k1_fe_t x;
+    secp256k1_fe_t y;
+    int infinity; /* whether this represents the point at infinity */
+} secp256k1_ge_t;
+
+/** A group element of the secp256k1 curve, in jacobian coordinates. */
+typedef struct {
+    secp256k1_fe_t x; /* actual X: x/z^2 */
+    secp256k1_fe_t y; /* actual Y: y/z^3 */
+    secp256k1_fe_t z;
+    int infinity; /* whether this represents the point at infinity */
+} secp256k1_gej_t;
+
+/** Global constants related to the group */
+typedef struct {
+    secp256k1_num_t order; /* the order of the curve (= order of its generator) */
+    secp256k1_num_t half_order; /* half the order of the curve (= order of its generator) */
+    secp256k1_ge_t g; /* the generator point */
+
+#ifdef USE_ENDOMORPHISM
+    /* constants related to secp256k1's efficiently computable endomorphism */
+    secp256k1_fe_t beta;
+    secp256k1_num_t lambda, a1b2, b1, a2;
+#endif
+} secp256k1_ge_consts_t;
+
+static const secp256k1_ge_consts_t *secp256k1_ge_consts = NULL;
+
+/** Initialize the group module. */
+static void secp256k1_ge_start(void);
+
+/** De-initialize the group module. */
+static void secp256k1_ge_stop(void);
+
+/** Set a group element equal to the point at infinity */
+static void secp256k1_ge_set_infinity(secp256k1_ge_t *r);
+
+/** Set a group element equal to the point with given X and Y coordinates */
+static void secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y);
+
+/** Set a group element (affine) equal to the point with the given X coordinate, and given oddness
+ *  for Y. Return value indicates whether the result is valid. */
+static int secp256k1_ge_set_xo(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd);
+
+/** Check whether a group element is the point at infinity. */
+static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a);
+
+/** Check whether a group element is valid (i.e., on the curve). */
+static int secp256k1_ge_is_valid(const secp256k1_ge_t *a);
+
+static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a);
+
+/** Get a hex representation of a point. *rlen will be overwritten with the real length. */
+static void secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a);
+
+/** Set a group element equal to another which is given in jacobian coordinates */
+static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a);
+
+/** Set a batch of group elements equal to the inputs given in jacobian coordinates */
+static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t r[len], const secp256k1_gej_t a[len]);
+
+
+/** Set a group element (jacobian) equal to the point at infinity. */
+static void secp256k1_gej_set_infinity(secp256k1_gej_t *r);
+
+/** Set a group element (jacobian) equal to the point with given X and Y coordinates. */
+static void secp256k1_gej_set_xy(secp256k1_gej_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y);
+
+/** Set a group element (jacobian) equal to another which is given in affine coordinates. */
+static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a);
+
+/** Get the X coordinate of a group element (jacobian). */
+static void secp256k1_gej_get_x_var(secp256k1_fe_t *r, const secp256k1_gej_t *a);
+
+/** Set r equal to the inverse of a (i.e., mirrored around the X axis) */
+static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a);
+
+/** Check whether a group element is the point at infinity. */
+static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a);
+
+/** Set r equal to the double of a. */
+static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a);
+
+/** Set r equal to the sum of a and b. */
+static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b);
+
+/** Set r equal to the sum of a and b (with b given in affine coordinates, and not infinity). */
+static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
+
+/** Set r equal to the sum of a and b (with b given in affine coordinates). This is more efficient
+    than secp256k1_gej_add_var. It is identical to secp256k1_gej_add_ge but without constant-time
+    guarantee, and b is allowed to be infinity. */
+static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
+
+/** Get a hex representation of a point. *rlen will be overwritten with the real length. */
+static void secp256k1_gej_get_hex(char *r, int *rlen, const secp256k1_gej_t *a);
+
+#ifdef USE_ENDOMORPHISM
+/** Set r to be equal to lambda times a, where lambda is chosen in a way such that this is very fast. */
+static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a);
+
+/** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (given that a is
+    not more than 256 bits). */
+static void secp256k1_gej_split_exp_var(secp256k1_num_t *r1, secp256k1_num_t *r2, const secp256k1_num_t *a);
+#endif
+
+/** Clear a secp256k1_gej_t to prevent leaking sensitive information. */
+static void secp256k1_gej_clear(secp256k1_gej_t *r);
+
+/** Clear a secp256k1_ge_t to prevent leaking sensitive information. */
+static void secp256k1_ge_clear(secp256k1_ge_t *r);
+
+
+#endif
diff --git a/src/group_impl.h b/src/group_impl.h
new file mode 100644
index 0000000000..1edbc6e099
--- /dev/null
+++ b/src/group_impl.h
@@ -0,0 +1,519 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_GROUP_IMPL_H_
+#define _SECP256K1_GROUP_IMPL_H_
+
+#include <string.h>
+
+#include "num.h"
+#include "field.h"
+#include "group.h"
+
+static void secp256k1_ge_set_infinity(secp256k1_ge_t *r) {
+    r->infinity = 1;
+}
+
+static void secp256k1_ge_set_xy(secp256k1_ge_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y) {
+    r->infinity = 0;
+    r->x = *x;
+    r->y = *y;
+}
+
+static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a) {
+    return a->infinity;
+}
+
+static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
+    r->infinity = a->infinity;
+    r->x = a->x;
+    r->y = a->y;
+    secp256k1_fe_normalize(&r->y);
+    secp256k1_fe_negate(&r->y, &r->y, 1);
+}
+
+static void secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a) {
+    char cx[65]; int lx=65;
+    char cy[65]; int ly=65;
+    secp256k1_fe_get_hex(cx, &lx, &a->x);
+    secp256k1_fe_get_hex(cy, &ly, &a->y);
+    lx = strlen(cx);
+    ly = strlen(cy);
+    int len = lx + ly + 3 + 1;
+    if (*rlen < len) {
+        *rlen = len;
+        return;
+    }
+    *rlen = len;
+    r[0] = '(';
+    memcpy(r+1, cx, lx);
+    r[1+lx] = ',';
+    memcpy(r+2+lx, cy, ly);
+    r[2+lx+ly] = ')';
+    r[3+lx+ly] = 0;
+}
+
+static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a) {
+    r->infinity = a->infinity;
+    secp256k1_fe_inv(&a->z, &a->z);
+    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_t z3; secp256k1_fe_mul(&z3, &a->z, &z2);
+    secp256k1_fe_mul(&a->x, &a->x, &z2);
+    secp256k1_fe_mul(&a->y, &a->y, &z3);
+    secp256k1_fe_set_int(&a->z, 1);
+    r->x = a->x;
+    r->y = a->y;
+}
+
+static void secp256k1_ge_set_gej_var(secp256k1_ge_t *r, secp256k1_gej_t *a) {
+    r->infinity = a->infinity;
+    if (a->infinity) {
+        return;
+    }
+    secp256k1_fe_inv_var(&a->z, &a->z);
+    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_t z3; secp256k1_fe_mul(&z3, &a->z, &z2);
+    secp256k1_fe_mul(&a->x, &a->x, &z2);
+    secp256k1_fe_mul(&a->y, &a->y, &z3);
+    secp256k1_fe_set_int(&a->z, 1);
+    r->x = a->x;
+    r->y = a->y;
+}
+
+static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t r[len], const secp256k1_gej_t a[len]) {
+    size_t count = 0;
+    secp256k1_fe_t az[len];
+    for (size_t i=0; i<len; i++) {
+        if (!a[i].infinity) {
+            az[count++] = a[i].z;
+        }
+    }
+
+    secp256k1_fe_t azi[count];
+    secp256k1_fe_inv_all_var(count, azi, az);
+
+    count = 0;
+    for (size_t i=0; i<len; i++) {
+        r[i].infinity = a[i].infinity;
+        if (!a[i].infinity) {
+            secp256k1_fe_t *zi = &azi[count++];
+            secp256k1_fe_t zi2; secp256k1_fe_sqr(&zi2, zi);
+            secp256k1_fe_t zi3; secp256k1_fe_mul(&zi3, &zi2, zi);
+            secp256k1_fe_mul(&r[i].x, &a[i].x, &zi2);
+            secp256k1_fe_mul(&r[i].y, &a[i].y, &zi3);
+        }
+    }
+}
+
+static void secp256k1_gej_set_infinity(secp256k1_gej_t *r) {
+    r->infinity = 1;
+    secp256k1_fe_set_int(&r->x, 0);
+    secp256k1_fe_set_int(&r->y, 0);
+    secp256k1_fe_set_int(&r->z, 0);
+}
+
+static void secp256k1_gej_set_xy(secp256k1_gej_t *r, const secp256k1_fe_t *x, const secp256k1_fe_t *y) {
+    r->infinity = 0;
+    r->x = *x;
+    r->y = *y;
+    secp256k1_fe_set_int(&r->z, 1);
+}
+
+static void secp256k1_gej_clear(secp256k1_gej_t *r) {
+    r->infinity = 0;
+    secp256k1_fe_clear(&r->x);
+    secp256k1_fe_clear(&r->y);
+    secp256k1_fe_clear(&r->z);
+}
+
+static void secp256k1_ge_clear(secp256k1_ge_t *r) {
+    r->infinity = 0;
+    secp256k1_fe_clear(&r->x);
+    secp256k1_fe_clear(&r->y);
+}
+
+static int secp256k1_ge_set_xo(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd) {
+    r->x = *x;
+    secp256k1_fe_t x2; secp256k1_fe_sqr(&x2, x);
+    secp256k1_fe_t x3; secp256k1_fe_mul(&x3, x, &x2);
+    r->infinity = 0;
+    secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7);
+    secp256k1_fe_add(&c, &x3);
+    if (!secp256k1_fe_sqrt(&r->y, &c))
+        return 0;
+    secp256k1_fe_normalize(&r->y);
+    if (secp256k1_fe_is_odd(&r->y) != odd)
+        secp256k1_fe_negate(&r->y, &r->y, 1);
+    return 1;
+}
+
+static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a) {
+   r->infinity = a->infinity;
+   r->x = a->x;
+   r->y = a->y;
+   secp256k1_fe_set_int(&r->z, 1);
+}
+
+static void secp256k1_gej_get_x_var(secp256k1_fe_t *r, const secp256k1_gej_t *a) {
+    secp256k1_fe_t zi2; secp256k1_fe_inv_var(&zi2, &a->z); secp256k1_fe_sqr(&zi2, &zi2);
+    secp256k1_fe_mul(r, &a->x, &zi2);
+}
+
+static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
+    r->infinity = a->infinity;
+    r->x = a->x;
+    r->y = a->y;
+    r->z = a->z;
+    secp256k1_fe_normalize(&r->y);
+    secp256k1_fe_negate(&r->y, &r->y, 1);
+}
+
+static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a) {
+    return a->infinity;
+}
+
+static int secp256k1_gej_is_valid(const secp256k1_gej_t *a) {
+    if (a->infinity)
+        return 0;
+    /** y^2 = x^3 + 7
+     *  (Y/Z^3)^2 = (X/Z^2)^3 + 7
+     *  Y^2 / Z^6 = X^3 / Z^6 + 7
+     *  Y^2 = X^3 + 7*Z^6
+     */
+    secp256k1_fe_t y2; secp256k1_fe_sqr(&y2, &a->y);
+    secp256k1_fe_t x3; secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
+    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_t z6; secp256k1_fe_sqr(&z6, &z2); secp256k1_fe_mul(&z6, &z6, &z2);
+    secp256k1_fe_mul_int(&z6, 7);
+    secp256k1_fe_add(&x3, &z6);
+    secp256k1_fe_normalize(&y2);
+    secp256k1_fe_normalize(&x3);
+    return secp256k1_fe_equal(&y2, &x3);
+}
+
+static int secp256k1_ge_is_valid(const secp256k1_ge_t *a) {
+    if (a->infinity)
+        return 0;
+    /* y^2 = x^3 + 7 */
+    secp256k1_fe_t y2; secp256k1_fe_sqr(&y2, &a->y);
+    secp256k1_fe_t x3; secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
+    secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7);
+    secp256k1_fe_add(&x3, &c);
+    secp256k1_fe_normalize(&y2);
+    secp256k1_fe_normalize(&x3);
+    return secp256k1_fe_equal(&y2, &x3);
+}
+
+static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
+    if (a->infinity) {
+        r->infinity = 1;
+        return;
+    }
+
+    secp256k1_fe_t t5 = a->y;
+    secp256k1_fe_normalize(&t5);
+    if (secp256k1_fe_is_zero(&t5)) {
+        r->infinity = 1;
+        return;
+    }
+
+    secp256k1_fe_t t1,t2,t3,t4;
+    secp256k1_fe_mul(&r->z, &t5, &a->z);
+    secp256k1_fe_mul_int(&r->z, 2);       /* Z' = 2*Y*Z (2) */
+    secp256k1_fe_sqr(&t1, &a->x);
+    secp256k1_fe_mul_int(&t1, 3);         /* T1 = 3*X^2 (3) */
+    secp256k1_fe_sqr(&t2, &t1);           /* T2 = 9*X^4 (1) */
+    secp256k1_fe_sqr(&t3, &t5);
+    secp256k1_fe_mul_int(&t3, 2);         /* T3 = 2*Y^2 (2) */
+    secp256k1_fe_sqr(&t4, &t3);
+    secp256k1_fe_mul_int(&t4, 2);         /* T4 = 8*Y^4 (2) */
+    secp256k1_fe_mul(&t3, &a->x, &t3);    /* T3 = 2*X*Y^2 (1) */
+    r->x = t3;
+    secp256k1_fe_mul_int(&r->x, 4);       /* X' = 8*X*Y^2 (4) */
+    secp256k1_fe_negate(&r->x, &r->x, 4); /* X' = -8*X*Y^2 (5) */
+    secp256k1_fe_add(&r->x, &t2);         /* X' = 9*X^4 - 8*X*Y^2 (6) */
+    secp256k1_fe_negate(&t2, &t2, 1);     /* T2 = -9*X^4 (2) */
+    secp256k1_fe_mul_int(&t3, 6);         /* T3 = 12*X*Y^2 (6) */
+    secp256k1_fe_add(&t3, &t2);           /* T3 = 12*X*Y^2 - 9*X^4 (8) */
+    secp256k1_fe_mul(&r->y, &t1, &t3);    /* Y' = 36*X^3*Y^2 - 27*X^6 (1) */
+    secp256k1_fe_negate(&t2, &t4, 2);     /* T2 = -8*Y^4 (3) */
+    secp256k1_fe_add(&r->y, &t2);         /* Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4) */
+    r->infinity = 0;
+}
+
+static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b) {
+    if (a->infinity) {
+        *r = *b;
+        return;
+    }
+    if (b->infinity) {
+        *r = *a;
+        return;
+    }
+    r->infinity = 0;
+    secp256k1_fe_t z22; secp256k1_fe_sqr(&z22, &b->z);
+    secp256k1_fe_t z12; secp256k1_fe_sqr(&z12, &a->z);
+    secp256k1_fe_t u1; secp256k1_fe_mul(&u1, &a->x, &z22);
+    secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
+    secp256k1_fe_t s1; secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
+    secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
+    secp256k1_fe_normalize(&u1);
+    secp256k1_fe_normalize(&u2);
+    if (secp256k1_fe_equal(&u1, &u2)) {
+        secp256k1_fe_normalize(&s1);
+        secp256k1_fe_normalize(&s2);
+        if (secp256k1_fe_equal(&s1, &s2)) {
+            secp256k1_gej_double_var(r, a);
+        } else {
+            r->infinity = 1;
+        }
+        return;
+    }
+    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
+    secp256k1_fe_t i2; secp256k1_fe_sqr(&i2, &i);
+    secp256k1_fe_t h2; secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_t h3; secp256k1_fe_mul(&h3, &h, &h2);
+    secp256k1_fe_mul(&r->z, &a->z, &b->z); secp256k1_fe_mul(&r->z, &r->z, &h);
+    secp256k1_fe_t t; secp256k1_fe_mul(&t, &u1, &h2);
+    r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
+    secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
+    secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
+    secp256k1_fe_add(&r->y, &h3);
+}
+
+static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
+    if (a->infinity) {
+        r->infinity = b->infinity;
+        r->x = b->x;
+        r->y = b->y;
+        secp256k1_fe_set_int(&r->z, 1);
+        return;
+    }
+    if (b->infinity) {
+        *r = *a;
+        return;
+    }
+    r->infinity = 0;
+    secp256k1_fe_t z12; secp256k1_fe_sqr(&z12, &a->z);
+    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize(&u1);
+    secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
+    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize(&s1);
+    secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
+    secp256k1_fe_normalize(&u1);
+    secp256k1_fe_normalize(&u2);
+    if (secp256k1_fe_equal(&u1, &u2)) {
+        secp256k1_fe_normalize(&s1);
+        secp256k1_fe_normalize(&s2);
+        if (secp256k1_fe_equal(&s1, &s2)) {
+            secp256k1_gej_double_var(r, a);
+        } else {
+            r->infinity = 1;
+        }
+        return;
+    }
+    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
+    secp256k1_fe_t i2; secp256k1_fe_sqr(&i2, &i);
+    secp256k1_fe_t h2; secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_t h3; secp256k1_fe_mul(&h3, &h, &h2);
+    r->z = a->z; secp256k1_fe_mul(&r->z, &r->z, &h);
+    secp256k1_fe_t t; secp256k1_fe_mul(&t, &u1, &h2);
+    r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
+    secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
+    secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
+    secp256k1_fe_add(&r->y, &h3);
+}
+
+static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
+    VERIFY_CHECK(!b->infinity);
+    VERIFY_CHECK(a->infinity == 0 || a->infinity == 1);
+
+    /** In:
+     *    Eric Brier and Marc Joye, Weierstrass Elliptic Curves and Side-Channel Attacks.
+     *    In D. Naccache and P. Paillier, Eds., Public Key Cryptography, vol. 2274 of Lecture Notes in Computer Science, pages 335-345. Springer-Verlag, 2002.
+     *  we find as solution for a unified addition/doubling formula:
+     *    lambda = ((x1 + x2)^2 - x1 * x2 + a) / (y1 + y2), with a = 0 for secp256k1's curve equation.
+     *    x3 = lambda^2 - (x1 + x2)
+     *    2*y3 = lambda * (x1 + x2 - 2 * x3) - (y1 + y2).
+     *
+     *  Substituting x_i = Xi / Zi^2 and yi = Yi / Zi^3, for i=1,2,3, gives:
+     *    U1 = X1*Z2^2, U2 = X2*Z1^2
+     *    S1 = X1*Z2^3, S2 = X2*Z2^3
+     *    Z = Z1*Z2
+     *    T = U1+U2
+     *    M = S1+S2
+     *    Q = T*M^2
+     *    R = T^2-U1*U2
+     *    X3 = 4*(R^2-Q)
+     *    Y3 = 4*(R*(3*Q-2*R^2)-M^4)
+     *    Z3 = 2*M*Z
+     *  (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.)
+     */
+
+    secp256k1_fe_t zz; secp256k1_fe_sqr(&zz, &a->z);                /* z = Z1^2 */
+    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize(&u1);          /* u1 = U1 = X1*Z2^2 (1) */
+    secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &zz);           /* u2 = U2 = X2*Z1^2 (1) */
+    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize(&s1);          /* s1 = S1 = Y1*Z2^3 (1) */
+    secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &zz);           /* s2 = Y2*Z2^2 (1) */
+    secp256k1_fe_mul(&s2, &s2, &a->z);                              /* s2 = S2 = Y2*Z1^3 (1) */
+    secp256k1_fe_t z = a->z;                                        /* z = Z = Z1*Z2 (8) */
+    secp256k1_fe_t t = u1; secp256k1_fe_add(&t, &u2);               /* t = T = U1+U2 (2) */
+    secp256k1_fe_t m = s1; secp256k1_fe_add(&m, &s2);               /* m = M = S1+S2 (2) */
+    secp256k1_fe_t n; secp256k1_fe_sqr(&n, &m);                     /* n = M^2 (1) */
+    secp256k1_fe_t q; secp256k1_fe_mul(&q, &n, &t);                 /* q = Q = T*M^2 (1) */
+    secp256k1_fe_sqr(&n, &n);                                       /* n = M^4 (1) */
+    secp256k1_fe_t rr; secp256k1_fe_sqr(&rr, &t);                   /* rr = T^2 (1) */
+    secp256k1_fe_mul(&t, &u1, &u2); secp256k1_fe_negate(&t, &t, 1); /* t = -U1*U2 (2) */
+    secp256k1_fe_add(&rr, &t);                                      /* rr = R = T^2-U1*U2 (3) */
+    secp256k1_fe_sqr(&t, &rr);                                      /* t = R^2 (1) */
+    secp256k1_fe_mul(&r->z, &m, &z);                                /* r->z = M*Z (1) */
+    secp256k1_fe_normalize(&r->z);
+    int infinity = secp256k1_fe_is_zero(&r->z) * (1 - a->infinity);
+    secp256k1_fe_mul_int(&r->z, 2 * (1 - a->infinity)); /* r->z = Z3 = 2*M*Z (2) */
+    r->x = t;                                           /* r->x = R^2 (1) */
+    secp256k1_fe_negate(&q, &q, 1);                     /* q = -Q (2) */
+    secp256k1_fe_add(&r->x, &q);                        /* r->x = R^2-Q (3) */
+    secp256k1_fe_normalize(&r->x);
+    secp256k1_fe_mul_int(&q, 3);                        /* q = -3*Q (6) */
+    secp256k1_fe_mul_int(&t, 2);                        /* t = 2*R^2 (2) */
+    secp256k1_fe_add(&t, &q);                           /* t = 2*R^2-3*Q (8) */
+    secp256k1_fe_mul(&t, &t, &rr);                      /* t = R*(2*R^2-3*Q) (1) */
+    secp256k1_fe_add(&t, &n);                           /* t = R*(2*R^2-3*Q)+M^4 (2) */
+    secp256k1_fe_negate(&r->y, &t, 2);                  /* r->y = R*(3*Q-2*R^2)-M^4 (3) */
+    secp256k1_fe_normalize(&r->y);
+    secp256k1_fe_mul_int(&r->x, 4 * (1 - a->infinity)); /* r->x = X3 = 4*(R^2-Q) */
+    secp256k1_fe_mul_int(&r->y, 4 * (1 - a->infinity)); /* r->y = Y3 = 4*R*(3*Q-2*R^2)-4*M^4 (4) */
+
+    /** In case a->infinity == 1, the above code results in r->x, r->y, and r->z all equal to 0.
+     *  Add b->x to x, b->y to y, and 1 to z in that case.
+     */
+    t = b->x; secp256k1_fe_mul_int(&t, a->infinity);
+    secp256k1_fe_add(&r->x, &t);
+    t = b->y; secp256k1_fe_mul_int(&t, a->infinity);
+    secp256k1_fe_add(&r->y, &t);
+    secp256k1_fe_set_int(&t, a->infinity);
+    secp256k1_fe_add(&r->z, &t);
+    r->infinity = infinity;
+}
+
+
+
+static void secp256k1_gej_get_hex(char *r, int *rlen, const secp256k1_gej_t *a) {
+    secp256k1_gej_t c = *a;
+    secp256k1_ge_t t; secp256k1_ge_set_gej(&t, &c);
+    secp256k1_ge_get_hex(r, rlen, &t);
+}
+
+#ifdef USE_ENDOMORPHISM
+static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
+    const secp256k1_fe_t *beta = &secp256k1_ge_consts->beta;
+    *r = *a;
+    secp256k1_fe_mul(&r->x, &r->x, beta);
+}
+
+static void secp256k1_gej_split_exp_var(secp256k1_num_t *r1, secp256k1_num_t *r2, const secp256k1_num_t *a) {
+    const secp256k1_ge_consts_t *c = secp256k1_ge_consts;
+    secp256k1_num_t bnc1, bnc2, bnt1, bnt2, bnn2;
+
+    secp256k1_num_copy(&bnn2, &c->order);
+    secp256k1_num_shift(&bnn2, 1);
+
+    secp256k1_num_mul(&bnc1, a, &c->a1b2);
+    secp256k1_num_add(&bnc1, &bnc1, &bnn2);
+    secp256k1_num_div(&bnc1, &bnc1, &c->order);
+
+    secp256k1_num_mul(&bnc2, a, &c->b1);
+    secp256k1_num_add(&bnc2, &bnc2, &bnn2);
+    secp256k1_num_div(&bnc2, &bnc2, &c->order);
+
+    secp256k1_num_mul(&bnt1, &bnc1, &c->a1b2);
+    secp256k1_num_mul(&bnt2, &bnc2, &c->a2);
+    secp256k1_num_add(&bnt1, &bnt1, &bnt2);
+    secp256k1_num_sub(r1, a, &bnt1);
+    secp256k1_num_mul(&bnt1, &bnc1, &c->b1);
+    secp256k1_num_mul(&bnt2, &bnc2, &c->a1b2);
+    secp256k1_num_sub(r2, &bnt1, &bnt2);
+}
+#endif
+
+
+static void secp256k1_ge_start(void) {
+    static const unsigned char secp256k1_ge_consts_order[] = {
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
+        0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
+        0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
+    };
+    static const unsigned char secp256k1_ge_consts_g_x[] = {
+        0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,
+        0x55,0xA0,0x62,0x95,0xCE,0x87,0x0B,0x07,
+        0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,
+        0x59,0xF2,0x81,0x5B,0x16,0xF8,0x17,0x98
+    };
+    static const unsigned char secp256k1_ge_consts_g_y[] = {
+        0x48,0x3A,0xDA,0x77,0x26,0xA3,0xC4,0x65,
+        0x5D,0xA4,0xFB,0xFC,0x0E,0x11,0x08,0xA8,
+        0xFD,0x17,0xB4,0x48,0xA6,0x85,0x54,0x19,
+        0x9C,0x47,0xD0,0x8F,0xFB,0x10,0xD4,0xB8
+    };
+#ifdef USE_ENDOMORPHISM
+    /* properties of secp256k1's efficiently computable endomorphism */
+    static const unsigned char secp256k1_ge_consts_lambda[] = {
+        0x53,0x63,0xad,0x4c,0xc0,0x5c,0x30,0xe0,
+        0xa5,0x26,0x1c,0x02,0x88,0x12,0x64,0x5a,
+        0x12,0x2e,0x22,0xea,0x20,0x81,0x66,0x78,
+        0xdf,0x02,0x96,0x7c,0x1b,0x23,0xbd,0x72
+    };
+    static const unsigned char secp256k1_ge_consts_beta[] = {
+        0x7a,0xe9,0x6a,0x2b,0x65,0x7c,0x07,0x10,
+        0x6e,0x64,0x47,0x9e,0xac,0x34,0x34,0xe9,
+        0x9c,0xf0,0x49,0x75,0x12,0xf5,0x89,0x95,
+        0xc1,0x39,0x6c,0x28,0x71,0x95,0x01,0xee
+    };
+    static const unsigned char secp256k1_ge_consts_a1b2[] = {
+        0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,
+        0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15
+    };
+    static const unsigned char secp256k1_ge_consts_b1[] = {
+        0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,
+        0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3
+    };
+    static const unsigned char secp256k1_ge_consts_a2[] = {
+        0x01,
+        0x14,0xca,0x50,0xf7,0xa8,0xe2,0xf3,0xf6,
+        0x57,0xc1,0x10,0x8d,0x9d,0x44,0xcf,0xd8
+    };
+#endif
+    if (secp256k1_ge_consts == NULL) {
+        secp256k1_ge_consts_t *ret = (secp256k1_ge_consts_t*)malloc(sizeof(secp256k1_ge_consts_t));
+        secp256k1_num_set_bin(&ret->order,  secp256k1_ge_consts_order,  sizeof(secp256k1_ge_consts_order));
+        secp256k1_num_copy(&ret->half_order, &ret->order);
+        secp256k1_num_shift(&ret->half_order, 1);
+#ifdef USE_ENDOMORPHISM
+        secp256k1_num_set_bin(&ret->lambda, secp256k1_ge_consts_lambda, sizeof(secp256k1_ge_consts_lambda));
+        secp256k1_num_set_bin(&ret->a1b2,   secp256k1_ge_consts_a1b2,   sizeof(secp256k1_ge_consts_a1b2));
+        secp256k1_num_set_bin(&ret->a2,     secp256k1_ge_consts_a2,     sizeof(secp256k1_ge_consts_a2));
+        secp256k1_num_set_bin(&ret->b1,     secp256k1_ge_consts_b1,     sizeof(secp256k1_ge_consts_b1));
+        secp256k1_fe_set_b32(&ret->beta, secp256k1_ge_consts_beta);
+#endif
+        secp256k1_fe_t g_x, g_y;
+        secp256k1_fe_set_b32(&g_x, secp256k1_ge_consts_g_x);
+        secp256k1_fe_set_b32(&g_y, secp256k1_ge_consts_g_y);
+        secp256k1_ge_set_xy(&ret->g, &g_x, &g_y);
+        secp256k1_ge_consts = ret;
+    }
+}
+
+static void secp256k1_ge_stop(void) {
+    if (secp256k1_ge_consts != NULL) {
+        secp256k1_ge_consts_t *c = (secp256k1_ge_consts_t*)secp256k1_ge_consts;
+        free((void*)c);
+        secp256k1_ge_consts = NULL;
+    }
+}
+
+#endif
diff --git a/src/java/org/bitcoin/NativeSecp256k1.java b/src/java/org/bitcoin/NativeSecp256k1.java
new file mode 100644
index 0000000000..90a498eaa2
--- /dev/null
+++ b/src/java/org/bitcoin/NativeSecp256k1.java
@@ -0,0 +1,60 @@
+package org.bitcoin;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import com.google.common.base.Preconditions;
+
+
+/**
+ * This class holds native methods to handle ECDSA verification.
+ * You can find an example library that can be used for this at
+ * https://github.com/sipa/secp256k1
+ */
+public class NativeSecp256k1 {
+    public static final boolean enabled;
+    static {
+        boolean isEnabled = true;
+        try {
+            System.loadLibrary("javasecp256k1");
+        } catch (UnsatisfiedLinkError e) {
+            isEnabled = false;
+        }
+        enabled = isEnabled;
+    }
+    
+    private static ThreadLocal<ByteBuffer> nativeECDSABuffer = new ThreadLocal<ByteBuffer>();
+    /**
+     * Verifies the given secp256k1 signature in native code.
+     * Calling when enabled == false is undefined (probably library not loaded)
+     * 
+     * @param data The data which was signed, must be exactly 32 bytes
+     * @param signature The signature
+     * @param pub The public key which did the signing
+     */
+    public static boolean verify(byte[] data, byte[] signature, byte[] pub) {
+        Preconditions.checkArgument(data.length == 32 && signature.length <= 520 && pub.length <= 520);
+
+        ByteBuffer byteBuff = nativeECDSABuffer.get();
+        if (byteBuff == null) {
+            byteBuff = ByteBuffer.allocateDirect(32 + 8 + 520 + 520);
+            byteBuff.order(ByteOrder.nativeOrder());
+            nativeECDSABuffer.set(byteBuff);
+        }
+        byteBuff.rewind();
+        byteBuff.put(data);
+        byteBuff.putInt(signature.length);
+        byteBuff.putInt(pub.length);
+        byteBuff.put(signature);
+        byteBuff.put(pub);
+        return secp256k1_ecdsa_verify(byteBuff) == 1;
+    }
+
+    /**
+     * @param byteBuff signature format is byte[32] data,
+     *        native-endian int signatureLength, native-endian int pubkeyLength,
+     *        byte[signatureLength] signature, byte[pubkeyLength] pub
+     * @returns 1 for valid signature, anything else for invalid
+     */
+    private static native int secp256k1_ecdsa_verify(ByteBuffer byteBuff);
+}
diff --git a/src/java/org_bitcoin_NativeSecp256k1.c b/src/java/org_bitcoin_NativeSecp256k1.c
new file mode 100644
index 0000000000..bb4cd70728
--- /dev/null
+++ b/src/java/org_bitcoin_NativeSecp256k1.c
@@ -0,0 +1,23 @@
+#include "org_bitcoin_NativeSecp256k1.h"
+#include "include/secp256k1.h"
+
+JNIEXPORT jint JNICALL Java_org_bitcoin_NativeSecp256k1_secp256k1_1ecdsa_1verify
+  (JNIEnv* env, jclass classObject, jobject byteBufferObject)
+{
+	unsigned char* data = (unsigned char*) (*env)->GetDirectBufferAddress(env, byteBufferObject);
+	int sigLen = *((int*)(data + 32));
+	int pubLen = *((int*)(data + 32 + 4));
+
+	return secp256k1_ecdsa_verify(data, 32, data+32+8, sigLen, data+32+8+sigLen, pubLen);
+}
+
+static void __javasecp256k1_attach(void) __attribute__((constructor));
+static void __javasecp256k1_detach(void) __attribute__((destructor));
+
+static void __javasecp256k1_attach(void) {
+	secp256k1_start(SECP256K1_START_VERIFY);
+}
+
+static void __javasecp256k1_detach(void) {
+	secp256k1_stop();
+}
diff --git a/src/java/org_bitcoin_NativeSecp256k1.h b/src/java/org_bitcoin_NativeSecp256k1.h
new file mode 100644
index 0000000000..d7fb004fa8
--- /dev/null
+++ b/src/java/org_bitcoin_NativeSecp256k1.h
@@ -0,0 +1,21 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_bitcoin_NativeSecp256k1 */
+
+#ifndef _Included_org_bitcoin_NativeSecp256k1
+#define _Included_org_bitcoin_NativeSecp256k1
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_bitcoin_NativeSecp256k1
+ * Method:    secp256k1_ecdsa_verify
+ * Signature: (Ljava/nio/ByteBuffer;)I
+ */
+JNIEXPORT jint JNICALL Java_org_bitcoin_NativeSecp256k1_secp256k1_1ecdsa_1verify
+  (JNIEnv *, jclass, jobject);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/num.h b/src/num.h
new file mode 100644
index 0000000000..c86f847858
--- /dev/null
+++ b/src/num.h
@@ -0,0 +1,100 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_NUM_
+#define _SECP256K1_NUM_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#if defined(USE_NUM_GMP)
+#include "num_gmp.h"
+#else
+#error "Please select num implementation"
+#endif
+
+/** Clear a number to prevent the leak of sensitive data. */
+static void secp256k1_num_clear(secp256k1_num_t *r);
+
+/** Copy a number. */
+static void secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a);
+
+/** Convert a number's absolute value to a binary big-endian string.
+ *  There must be enough place. */
+static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a);
+
+/** Set a number to the value of a binary big-endian string. */
+static void secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen);
+
+/** Set a number equal to a (signed) integer. */
+static void secp256k1_num_set_int(secp256k1_num_t *r, int a);
+
+/** Compute a modular inverse. The input must be less than the modulus. */
+static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m);
+
+/** Multiply two numbers modulo another. */
+static void secp256k1_num_mod_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, const secp256k1_num_t *m);
+
+/** Compare the absolute value of two numbers. */
+static int secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b);
+
+/** Test whether two number are equal (including sign). */
+static int secp256k1_num_eq(const secp256k1_num_t *a, const secp256k1_num_t *b);
+
+/** Add two (signed) numbers. */
+static void secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
+
+/** Subtract two (signed) numbers. */
+static void secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
+
+/** Multiply two (signed) numbers. */
+static void secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
+
+/** Divide two (signed) numbers. */
+static void secp256k1_num_div(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b);
+
+/** Replace a number by its remainder modulo m. M's sign is ignored. The result is a number between 0 and m-1,
+    even if r was negative. */
+static void secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m);
+
+/** Calculate the number of bits in (the absolute value of) a number. */
+static int secp256k1_num_bits(const secp256k1_num_t *a);
+
+/** Right-shift the passed number by bits bits, and return those bits. */
+static int secp256k1_num_shift(secp256k1_num_t *r, int bits);
+
+/** Check whether a number is zero. */
+static int secp256k1_num_is_zero(const secp256k1_num_t *a);
+
+/** Check whether a number is odd. */
+static int secp256k1_num_is_odd(const secp256k1_num_t *a);
+
+/** Check whether a number is strictly negative. */
+static int secp256k1_num_is_neg(const secp256k1_num_t *a);
+
+/** Check whether a particular bit is set in a number. */
+static int secp256k1_num_get_bit(const secp256k1_num_t *a, int pos);
+
+/** Increase a number by 1. */
+static void secp256k1_num_inc(secp256k1_num_t *r);
+
+/** Set a number equal to the value of a hex string (unsigned). */
+static void secp256k1_num_set_hex(secp256k1_num_t *r, const char *a, int alen);
+
+/** Convert (the absolute value of) a number to a hexadecimal string. */
+static void secp256k1_num_get_hex(char *r, int rlen, const secp256k1_num_t *a);
+
+/** Split a number into a low and high part. */
+static void secp256k1_num_split(secp256k1_num_t *rl, secp256k1_num_t *rh, const secp256k1_num_t *a, int bits);
+
+/** Change a number's sign. */
+static void secp256k1_num_negate(secp256k1_num_t *r);
+
+/** Get a bunch of bits from a number. */
+static int secp256k1_num_get_bits(const secp256k1_num_t *a, int offset, int count);
+
+#endif
diff --git a/src/num_gmp.h b/src/num_gmp.h
new file mode 100644
index 0000000000..baa1f2bf2e
--- /dev/null
+++ b/src/num_gmp.h
@@ -0,0 +1,20 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_NUM_REPR_
+#define _SECP256K1_NUM_REPR_
+
+#include <gmp.h>
+
+#define NUM_LIMBS ((256+GMP_NUMB_BITS-1)/GMP_NUMB_BITS)
+
+typedef struct {
+    mp_limb_t data[2*NUM_LIMBS];
+    int neg;
+    int limbs;
+} secp256k1_num_t;
+
+#endif
diff --git a/src/num_gmp_impl.h b/src/num_gmp_impl.h
new file mode 100644
index 0000000000..e45a59e0cd
--- /dev/null
+++ b/src/num_gmp_impl.h
@@ -0,0 +1,376 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_NUM_REPR_IMPL_H_
+#define _SECP256K1_NUM_REPR_IMPL_H_
+
+#include <string.h>
+#include <stdlib.h>
+#include <gmp.h>
+
+#include "util.h"
+#include "num.h"
+
+#ifdef VERIFY
+static void secp256k1_num_sanity(const secp256k1_num_t *a) {
+    VERIFY_CHECK(a->limbs == 1 || (a->limbs > 1 && a->data[a->limbs-1] != 0));
+}
+#else
+#define secp256k1_num_sanity(a) do { } while(0)
+#endif
+
+static void secp256k1_num_init(secp256k1_num_t *r) {
+    r->neg = 0;
+    r->limbs = 1;
+    r->data[0] = 0;
+}
+
+static void secp256k1_num_clear(secp256k1_num_t *r) {
+    memset(r, 0, sizeof(*r));
+}
+
+static void secp256k1_num_free(secp256k1_num_t *r) {
+    (void)r;
+}
+
+static void secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a) {
+    *r = *a;
+}
+
+static int secp256k1_num_bits(const secp256k1_num_t *a) {
+    int ret=(a->limbs-1)*GMP_NUMB_BITS;
+    mp_limb_t x=a->data[a->limbs-1];
+    while (x) {
+        x >>= 1;
+        ret++;
+    }
+    return ret;
+}
+
+
+static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a) {
+    unsigned char tmp[65];
+    int len = 0;
+    if (a->limbs>1 || a->data[0] != 0) {
+        len = mpn_get_str(tmp, 256, (mp_limb_t*)a->data, a->limbs);
+    }
+    int shift = 0;
+    while (shift < len && tmp[shift] == 0) shift++;
+    VERIFY_CHECK(len-shift <= (int)rlen);
+    memset(r, 0, rlen - len + shift);
+    if (len > shift) {
+        memcpy(r + rlen - len + shift, tmp + shift, len - shift);
+    }
+    memset(tmp, 0, sizeof(tmp));
+}
+
+static void secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen) {
+    VERIFY_CHECK(alen > 0);
+    VERIFY_CHECK(alen <= 64);
+    int len = mpn_set_str(r->data, a, alen, 256);
+    VERIFY_CHECK(len <= NUM_LIMBS*2);
+    r->limbs = len;
+    r->neg = 0;
+    while (r->limbs > 1 && r->data[r->limbs-1]==0) r->limbs--;
+}
+
+static void secp256k1_num_set_int(secp256k1_num_t *r, int a) {
+    r->limbs = 1;
+    r->neg = (a < 0);
+    r->data[0] = (a < 0) ? -a : a;
+}
+
+static void secp256k1_num_add_abs(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    mp_limb_t c = mpn_add(r->data, a->data, a->limbs, b->data, b->limbs);
+    r->limbs = a->limbs;
+    if (c != 0) {
+        VERIFY_CHECK(r->limbs < 2*NUM_LIMBS);
+        r->data[r->limbs++] = c;
+    }
+}
+
+static void secp256k1_num_sub_abs(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    mp_limb_t c = mpn_sub(r->data, a->data, a->limbs, b->data, b->limbs);
+    VERIFY_CHECK(c == 0);
+    r->limbs = a->limbs;
+    while (r->limbs > 1 && r->data[r->limbs-1]==0) r->limbs--;
+}
+
+static void secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m) {
+    secp256k1_num_sanity(r);
+    secp256k1_num_sanity(m);
+
+    if (r->limbs >= m->limbs) {
+        mp_limb_t t[2*NUM_LIMBS];
+        mpn_tdiv_qr(t, r->data, 0, r->data, r->limbs, m->data, m->limbs);
+        memset(t, 0, sizeof(t));
+        r->limbs = m->limbs;
+        while (r->limbs > 1 && r->data[r->limbs-1]==0) r->limbs--;
+    }
+
+    if (r->neg && (r->limbs > 1 || r->data[0] != 0)) {
+        secp256k1_num_sub_abs(r, m, r);
+        r->neg = 0;
+    }
+}
+
+static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m) {
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(m);
+
+    /** mpn_gcdext computes: (G,S) = gcdext(U,V), where
+     *  * G = gcd(U,V)
+     *  * G = U*S + V*T
+     *  * U has equal or more limbs than V, and V has no padding
+     *  If we set U to be (a padded version of) a, and V = m:
+     *    G = a*S + m*T
+     *    G = a*S mod m
+     *  Assuming G=1:
+     *    S = 1/a mod m
+     */
+    VERIFY_CHECK(m->limbs <= NUM_LIMBS);
+    VERIFY_CHECK(m->data[m->limbs-1] != 0);
+    mp_limb_t g[NUM_LIMBS+1];
+    mp_limb_t u[NUM_LIMBS+1];
+    mp_limb_t v[NUM_LIMBS+1];
+    for (int i=0; i < m->limbs; i++) {
+        u[i] = (i < a->limbs) ? a->data[i] : 0;
+        v[i] = m->data[i];
+    }
+    mp_size_t sn = NUM_LIMBS+1;
+    mp_size_t gn = mpn_gcdext(g, r->data, &sn, u, m->limbs, v, m->limbs);
+    VERIFY_CHECK(gn == 1);
+    VERIFY_CHECK(g[0] == 1);
+    r->neg = a->neg ^ m->neg;
+    if (sn < 0) {
+        mpn_sub(r->data, m->data, m->limbs, r->data, -sn);
+        r->limbs = m->limbs;
+        while (r->limbs > 1 && r->data[r->limbs-1]==0) r->limbs--;
+    } else {
+        r->limbs = sn;
+    }
+    memset(g, 0, sizeof(g));
+    memset(u, 0, sizeof(u));
+    memset(v, 0, sizeof(v));
+}
+
+static int secp256k1_num_is_zero(const secp256k1_num_t *a) {
+    return (a->limbs == 1 && a->data[0] == 0);
+}
+
+static int secp256k1_num_is_odd(const secp256k1_num_t *a) {
+    return a->data[0] & 1;
+}
+
+static int secp256k1_num_is_neg(const secp256k1_num_t *a) {
+    return (a->limbs > 1 || a->data[0] != 0) && a->neg;
+}
+
+static int secp256k1_num_cmp(const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    if (a->limbs > b->limbs) return 1;
+    if (a->limbs < b->limbs) return -1;
+    return mpn_cmp(a->data, b->data, a->limbs);
+}
+
+static int secp256k1_num_eq(const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    if (a->limbs > b->limbs) return 0;
+    if (a->limbs < b->limbs) return 0;
+    if ((a->neg && !secp256k1_num_is_zero(a)) != (b->neg && !secp256k1_num_is_zero(b))) return 0;
+    return mpn_cmp(a->data, b->data, a->limbs) == 0;
+}
+
+static void secp256k1_num_subadd(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, int bneg) {
+    if (!(b->neg ^ bneg ^ a->neg)) { /* a and b have the same sign */
+        r->neg = a->neg;
+        if (a->limbs >= b->limbs) {
+            secp256k1_num_add_abs(r, a, b);
+        } else {
+            secp256k1_num_add_abs(r, b, a);
+        }
+    } else {
+        if (secp256k1_num_cmp(a, b) > 0) {
+            r->neg = a->neg;
+            secp256k1_num_sub_abs(r, a, b);
+        } else {
+            r->neg = b->neg ^ bneg;
+            secp256k1_num_sub_abs(r, b, a);
+        }
+    }
+}
+
+static void secp256k1_num_add(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(b);
+    secp256k1_num_subadd(r, a, b, 0);
+}
+
+static void secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(b);
+    secp256k1_num_subadd(r, a, b, 1);
+}
+
+static void secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(b);
+
+    mp_limb_t tmp[2*NUM_LIMBS+1];
+    VERIFY_CHECK(a->limbs + b->limbs <= 2*NUM_LIMBS+1);
+    if ((a->limbs==1 && a->data[0]==0) || (b->limbs==1 && b->data[0]==0)) {
+        r->limbs = 1;
+        r->neg = 0;
+        r->data[0] = 0;
+        return;
+    }
+    if (a->limbs >= b->limbs)
+        mpn_mul(tmp, a->data, a->limbs, b->data, b->limbs);
+    else
+        mpn_mul(tmp, b->data, b->limbs, a->data, a->limbs);
+    r->limbs = a->limbs + b->limbs;
+    if (r->limbs > 1 && tmp[r->limbs - 1]==0) r->limbs--;
+    VERIFY_CHECK(r->limbs <= 2*NUM_LIMBS);
+    mpn_copyi(r->data, tmp, r->limbs);
+    r->neg = a->neg ^ b->neg;
+    memset(tmp, 0, sizeof(tmp));
+}
+
+static void secp256k1_num_div(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    secp256k1_num_sanity(a);
+    secp256k1_num_sanity(b);
+    if (b->limbs > a->limbs) {
+        r->limbs = 1;
+        r->data[0] = 0;
+        r->neg = 0;
+        return;
+    }
+
+    mp_limb_t quo[2*NUM_LIMBS+1];
+    mp_limb_t rem[2*NUM_LIMBS+1];
+    mpn_tdiv_qr(quo, rem, 0, a->data, a->limbs, b->data, b->limbs);
+    mpn_copyi(r->data, quo, a->limbs - b->limbs + 1);
+    r->limbs = a->limbs - b->limbs + 1;
+    while (r->limbs > 1 && r->data[r->limbs - 1]==0) r->limbs--;
+    r->neg = a->neg ^ b->neg;
+}
+
+static void secp256k1_num_mod_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b, const secp256k1_num_t *m) {
+    secp256k1_num_mul(r, a, b);
+    secp256k1_num_mod(r, m);
+}
+
+
+static int secp256k1_num_shift(secp256k1_num_t *r, int bits) {
+    VERIFY_CHECK(bits <= GMP_NUMB_BITS);
+    mp_limb_t ret = mpn_rshift(r->data, r->data, r->limbs, bits);
+    if (r->limbs>1 && r->data[r->limbs-1]==0) r->limbs--;
+    ret >>= (GMP_NUMB_BITS - bits);
+    return ret;
+}
+
+static int secp256k1_num_get_bit(const secp256k1_num_t *a, int pos) {
+    return (a->limbs*GMP_NUMB_BITS > pos) && ((a->data[pos/GMP_NUMB_BITS] >> (pos % GMP_NUMB_BITS)) & 1);
+}
+
+static void secp256k1_num_inc(secp256k1_num_t *r) {
+    mp_limb_t ret = mpn_add_1(r->data, r->data, r->limbs, (mp_limb_t)1);
+    if (ret) {
+        VERIFY_CHECK(r->limbs < 2*NUM_LIMBS);
+        r->data[r->limbs++] = ret;
+    }
+}
+
+static void secp256k1_num_set_hex(secp256k1_num_t *r, const char *a, int alen) {
+    static const unsigned char cvt[256] = {
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 1, 2, 3, 4, 5, 6,7,8,9,0,0,0,0,0,0,
+        0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
+        0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0
+    };
+    unsigned char num[257] = {};
+    for (int i=0; i<alen; i++) {
+        num[i] = cvt[(unsigned char)a[i]];
+    }
+    r->limbs = mpn_set_str(r->data, num, alen, 16);
+    r->neg = 0;
+    while (r->limbs > 1 && r->data[r->limbs-1] == 0) r->limbs--;
+}
+
+static void secp256k1_num_get_hex(char *r, int rlen, const secp256k1_num_t *a) {
+    static const unsigned char cvt[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+    unsigned char *tmp = malloc(257);
+    mp_size_t len = mpn_get_str(tmp, 16, (mp_limb_t*)a->data, a->limbs);
+    VERIFY_CHECK(len <= rlen);
+    for (int i=0; i<len; i++) {
+        VERIFY_CHECK(rlen-len+i >= 0);
+        VERIFY_CHECK(rlen-len+i < rlen);
+        VERIFY_CHECK(tmp[i] < 16);
+        r[rlen-len+i] = cvt[tmp[i]];
+    }
+    for (int i=0; i<rlen-len; i++) {
+        VERIFY_CHECK(i >= 0);
+        VERIFY_CHECK(i < rlen);
+        r[i] = cvt[0];
+    }
+    free(tmp);
+}
+
+static void secp256k1_num_split(secp256k1_num_t *rl, secp256k1_num_t *rh, const secp256k1_num_t *a, int bits) {
+    VERIFY_CHECK(bits > 0);
+    rh->neg = a->neg;
+    if (bits >= a->limbs * GMP_NUMB_BITS) {
+        *rl = *a;
+        rh->limbs = 1;
+        rh->data[0] = 0;
+        return;
+    }
+    rl->limbs = 0;
+    rl->neg = a->neg;
+    int left = bits;
+    while (left >= GMP_NUMB_BITS) {
+        rl->data[rl->limbs] = a->data[rl->limbs];
+        rl->limbs++;
+        left -= GMP_NUMB_BITS;
+    }
+    if (left == 0) {
+        mpn_copyi(rh->data, a->data + rl->limbs, a->limbs - rl->limbs);
+        rh->limbs = a->limbs - rl->limbs;
+    } else {
+        mpn_rshift(rh->data, a->data + rl->limbs, a->limbs - rl->limbs, left);
+        rh->limbs = a->limbs - rl->limbs;
+        while (rh->limbs>1 && rh->data[rh->limbs-1]==0) rh->limbs--;
+    }
+    if (left > 0) {
+        rl->data[rl->limbs] = a->data[rl->limbs] & ((((mp_limb_t)1) << left) - 1);
+        rl->limbs++;
+    }
+    while (rl->limbs>1 && rl->data[rl->limbs-1]==0) rl->limbs--;
+}
+
+static void secp256k1_num_negate(secp256k1_num_t *r) {
+    r->neg ^= 1;
+}
+
+static int secp256k1_num_get_bits(const secp256k1_num_t *a, int offset, int count) {
+    int ret = 0;
+    for (int i = 0; i < count; i++) {
+        ret |= ((a->data[(offset + i) / GMP_NUMB_BITS] >> ((offset + i) % GMP_NUMB_BITS)) & 1) << i;
+    }
+    return ret;
+}
+
+#endif
diff --git a/src/num_impl.h b/src/num_impl.h
new file mode 100644
index 0000000000..f73d3ceea8
--- /dev/null
+++ b/src/num_impl.h
@@ -0,0 +1,22 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_NUM_IMPL_H_
+#define _SECP256K1_NUM_IMPL_H_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include "num.h"
+
+#if defined(USE_NUM_GMP)
+#include "num_gmp_impl.h"
+#else
+#error "Please select num implementation"
+#endif
+
+#endif
diff --git a/src/scalar.h b/src/scalar.h
new file mode 100644
index 0000000000..3baacb3721
--- /dev/null
+++ b/src/scalar.h
@@ -0,0 +1,63 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_
+#define _SECP256K1_SCALAR_
+
+#include "num.h"
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#if defined(USE_SCALAR_4X64)
+#include "scalar_4x64.h"
+#elif defined(USE_SCALAR_8X32)
+#include "scalar_8x32.h"
+#else
+#error "Please select scalar implementation"
+#endif
+
+/** Clear a scalar to prevent the leak of sensitive data. */
+static void secp256k1_scalar_clear(secp256k1_scalar_t *r);
+
+/** Access bits from a scalar. */
+static int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, int offset, int count);
+
+/** Set a scalar from a big endian byte array. */
+static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *bin, int *overflow);
+
+/** Convert a scalar to a byte array. */
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a);
+
+/** Add two scalars together (modulo the group order). */
+static void secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
+
+/** Multiply two scalars (modulo the group order). */
+static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
+
+/** Compute the square of a scalar (modulo the group order). */
+static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
+
+/** Compute the inverse of a scalar (modulo the group order). */
+static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
+
+/** Compute the complement of a scalar (modulo the group order). */
+static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
+
+/** Check whether a scalar equals zero. */
+static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a);
+
+/** Check whether a scalar equals one. */
+static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a);
+
+/** Check whether a scalar is higher than the group order divided by 2. */
+static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a);
+
+/** Convert a scalar to a number. */
+static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a);
+
+#endif
diff --git a/src/scalar_4x64.h b/src/scalar_4x64.h
new file mode 100644
index 0000000000..5a751c6862
--- /dev/null
+++ b/src/scalar_4x64.h
@@ -0,0 +1,17 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_REPR_
+#define _SECP256K1_SCALAR_REPR_
+
+#include <stdint.h>
+
+/** A scalar modulo the group order of the secp256k1 curve. */
+typedef struct {
+    uint64_t d[4];
+} secp256k1_scalar_t;
+
+#endif
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
new file mode 100644
index 0000000000..f78718234f
--- /dev/null
+++ b/src/scalar_4x64_impl.h
@@ -0,0 +1,359 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_REPR_IMPL_H_
+#define _SECP256K1_SCALAR_REPR_IMPL_H_
+
+typedef unsigned __int128 uint128_t;
+
+/* Limbs of the secp256k1 order. */
+#define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL)
+#define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL)
+#define SECP256K1_N_2 ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
+#define SECP256K1_N_3 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+
+/* Limbs of 2^256 minus the secp256k1 order. */
+#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
+#define SECP256K1_N_C_1 (~SECP256K1_N_1)
+#define SECP256K1_N_C_2 (1)
+
+/* Limbs of half the secp256k1 order. */
+#define SECP256K1_N_H_0 ((uint64_t)0xDFE92F46681B20A0ULL)
+#define SECP256K1_N_H_1 ((uint64_t)0x5D576E7357A4501DULL)
+#define SECP256K1_N_H_2 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#define SECP256K1_N_H_3 ((uint64_t)0x7FFFFFFFFFFFFFFFULL)
+
+SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar_t *r) {
+    r->d[0] = 0;
+    r->d[1] = 0;
+    r->d[2] = 0;
+    r->d[3] = 0;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, int offset, int count) {
+    VERIFY_CHECK((offset + count - 1) / 64 == offset / 64);
+    return (a->d[offset / 64] >> (offset % 64)) & ((((uint64_t)1) << count) - 1);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[3] < SECP256K1_N_3); /* No need for a > check. */
+    no |= (a->d[2] < SECP256K1_N_2);
+    yes |= (a->d[2] > SECP256K1_N_2) & ~no;
+    no |= (a->d[1] < SECP256K1_N_1);
+    yes |= (a->d[1] > SECP256K1_N_1) & ~no;
+    yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
+    return yes;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, unsigned int overflow) {
+    VERIFY_CHECK(overflow <= 1);
+    uint128_t t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0;
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1;
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)r->d[2] + overflow * SECP256K1_N_C_2;
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint64_t)r->d[3];
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
+    return overflow;
+}
+
+static void secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    uint128_t t = (uint128_t)a->d[0] + b->d[0];
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    secp256k1_scalar_reduce(r, t + secp256k1_scalar_check_overflow(r));
+}
+
+static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
+    r->d[0] = (uint64_t)b32[31] | (uint64_t)b32[30] << 8 | (uint64_t)b32[29] << 16 | (uint64_t)b32[28] << 24 | (uint64_t)b32[27] << 32 | (uint64_t)b32[26] << 40 | (uint64_t)b32[25] << 48 | (uint64_t)b32[24] << 56;
+    r->d[1] = (uint64_t)b32[23] | (uint64_t)b32[22] << 8 | (uint64_t)b32[21] << 16 | (uint64_t)b32[20] << 24 | (uint64_t)b32[19] << 32 | (uint64_t)b32[18] << 40 | (uint64_t)b32[17] << 48 | (uint64_t)b32[16] << 56;
+    r->d[2] = (uint64_t)b32[15] | (uint64_t)b32[14] << 8 | (uint64_t)b32[13] << 16 | (uint64_t)b32[12] << 24 | (uint64_t)b32[11] << 32 | (uint64_t)b32[10] << 40 | (uint64_t)b32[9] << 48 | (uint64_t)b32[8] << 56;
+    r->d[3] = (uint64_t)b32[7] | (uint64_t)b32[6] << 8 | (uint64_t)b32[5] << 16 | (uint64_t)b32[4] << 24 | (uint64_t)b32[3] << 32 | (uint64_t)b32[2] << 40 | (uint64_t)b32[1] << 48 | (uint64_t)b32[0] << 56;
+    int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
+    if (overflow) {
+        *overflow = over;
+    }
+}
+
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) {
+    bin[0] = a->d[3] >> 56; bin[1] = a->d[3] >> 48; bin[2] = a->d[3] >> 40; bin[3] = a->d[3] >> 32; bin[4] = a->d[3] >> 24; bin[5] = a->d[3] >> 16; bin[6] = a->d[3] >> 8; bin[7] = a->d[3];
+    bin[8] = a->d[2] >> 56; bin[9] = a->d[2] >> 48; bin[10] = a->d[2] >> 40; bin[11] = a->d[2] >> 32; bin[12] = a->d[2] >> 24; bin[13] = a->d[2] >> 16; bin[14] = a->d[2] >> 8; bin[15] = a->d[2];
+    bin[16] = a->d[1] >> 56; bin[17] = a->d[1] >> 48; bin[18] = a->d[1] >> 40; bin[19] = a->d[1] >> 32; bin[20] = a->d[1] >> 24; bin[21] = a->d[1] >> 16; bin[22] = a->d[1] >> 8; bin[23] = a->d[1];
+    bin[24] = a->d[0] >> 56; bin[25] = a->d[0] >> 48; bin[26] = a->d[0] >> 40; bin[27] = a->d[0] >> 32; bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0];
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) {
+    return (a->d[0] | a->d[1] | a->d[2] | a->d[3]) == 0;
+}
+
+static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
+    uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0);
+    uint128_t t = (uint128_t)(~a->d[0]) + SECP256K1_N_0 + 1;
+    r->d[0] = t & nonzero; t >>= 64;
+    t += (uint128_t)(~a->d[1]) + SECP256K1_N_1;
+    r->d[1] = t & nonzero; t >>= 64;
+    t += (uint128_t)(~a->d[2]) + SECP256K1_N_2;
+    r->d[2] = t & nonzero; t >>= 64;
+    t += (uint128_t)(~a->d[3]) + SECP256K1_N_3;
+    r->d[3] = t & nonzero;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a) {
+    return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3]) == 0;
+}
+
+static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[3] < SECP256K1_N_H_3);
+    yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
+    no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; /* No need for a > check. */
+    no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
+    yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
+    yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
+    return yes;
+}
+
+/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
+
+/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd(a,b) { \
+    uint64_t tl, th; \
+    { \
+        uint128_t t = (uint128_t)a * b; \
+        th = t >> 64;         /* at most 0xFFFFFFFFFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFFFFFFFFFF */ \
+    c1 += th;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < th) ? 1 : 0;  /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
+}
+
+/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
+#define muladd_fast(a,b) { \
+    uint64_t tl, th; \
+    { \
+        uint128_t t = (uint128_t)a * b; \
+        th = t >> 64;         /* at most 0xFFFFFFFFFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFFFFFFFFFF */ \
+    c1 += th;                 /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK(c1 >= th); \
+}
+
+/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd2(a,b) { \
+    uint64_t tl, th; \
+    { \
+        uint128_t t = (uint128_t)a * b; \
+        th = t >> 64;               /* at most 0xFFFFFFFFFFFFFFFE */ \
+        tl = t; \
+    } \
+    uint64_t th2 = th + th;         /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
+    c2 += (th2 < th) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
+    uint64_t tl2 = tl + tl;         /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
+    th2 += (tl2 < tl) ? 1 : 0;      /* at most 0xFFFFFFFFFFFFFFFF */ \
+    c0 += tl2;                      /* overflow is handled on the next line */ \
+    th2 += (c0 < tl2) ? 1 : 0;      /* second overflow is handled on the next line */ \
+    c2 += (c0 < tl2) & (th2 == 0);  /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
+    c1 += th2;                      /* overflow is handled on the next line */ \
+    c2 += (c1 < th2) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
+}
+
+/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define sumadd(a) { \
+    c0 += (a);                  /* overflow is handled on the next line */ \
+    unsigned int over = (c0 < (a)) ? 1 : 0; \
+    c1 += over;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < over) ? 1 : 0;  /* never overflows by contract */ \
+}
+
+/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
+#define sumadd_fast(a) { \
+    c0 += (a);                 /* overflow is handled on the next line */ \
+    c1 += (c0 < (a)) ? 1 : 0;  /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+/** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. */
+#define extract(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = c2; \
+    c2 = 0; \
+}
+
+/** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. c2 is required to be zero. */
+#define extract_fast(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = 0; \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint64_t *l) {
+    uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
+
+    /* 160 bit accumulator. */
+    uint64_t c0, c1;
+    uint32_t c2;
+
+    /* Reduce 512 bits into 385. */
+    /* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
+    c0 = l[0]; c1 = 0; c2 = 0;
+    muladd_fast(n0, SECP256K1_N_C_0);
+    uint64_t m0; extract_fast(m0);
+    sumadd_fast(l[1]);
+    muladd(n1, SECP256K1_N_C_0);
+    muladd(n0, SECP256K1_N_C_1);
+    uint64_t m1; extract(m1);
+    sumadd(l[2]);
+    muladd(n2, SECP256K1_N_C_0);
+    muladd(n1, SECP256K1_N_C_1);
+    sumadd(n0);
+    uint64_t m2; extract(m2);
+    sumadd(l[3]);
+    muladd(n3, SECP256K1_N_C_0);
+    muladd(n2, SECP256K1_N_C_1);
+    sumadd(n1);
+    uint64_t m3; extract(m3);
+    muladd(n3, SECP256K1_N_C_1);
+    sumadd(n2);
+    uint64_t m4; extract(m4);
+    sumadd_fast(n3);
+    uint64_t m5; extract_fast(m5);
+    VERIFY_CHECK(c0 <= 1);
+    uint32_t m6 = c0;
+
+    /* Reduce 385 bits into 258. */
+    /* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
+    c0 = m0; c1 = 0; c2 = 0;
+    muladd_fast(m4, SECP256K1_N_C_0);
+    uint64_t p0; extract_fast(p0);
+    sumadd_fast(m1);
+    muladd(m5, SECP256K1_N_C_0);
+    muladd(m4, SECP256K1_N_C_1);
+    uint64_t p1; extract(p1);
+    sumadd(m2);
+    muladd(m6, SECP256K1_N_C_0);
+    muladd(m5, SECP256K1_N_C_1);
+    sumadd(m4);
+    uint64_t p2; extract(p2);
+    sumadd_fast(m3);
+    muladd_fast(m6, SECP256K1_N_C_1);
+    sumadd_fast(m5);
+    uint64_t p3; extract_fast(p3);
+    uint32_t p4 = c0 + m6;
+    VERIFY_CHECK(p4 <= 2);
+
+    /* Reduce 258 bits into 256. */
+    /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
+    uint128_t c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
+    r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+    c += p1 + (uint128_t)SECP256K1_N_C_1 * p4;
+    r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+    c += p2 + (uint128_t)p4;
+    r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+    c += p3;
+    r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+
+    /* Final reduction of r. */
+    secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
+}
+
+static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    /* 160 bit accumulator. */
+    uint64_t c0 = 0, c1 = 0;
+    uint32_t c2 = 0;
+
+    uint64_t l[8];
+
+    /* l[0..7] = a[0..3] * b[0..3]. */
+    muladd_fast(a->d[0], b->d[0]);
+    extract_fast(l[0]);
+    muladd(a->d[0], b->d[1]);
+    muladd(a->d[1], b->d[0]);
+    extract(l[1]);
+    muladd(a->d[0], b->d[2]);
+    muladd(a->d[1], b->d[1]);
+    muladd(a->d[2], b->d[0]);
+    extract(l[2]);
+    muladd(a->d[0], b->d[3]);
+    muladd(a->d[1], b->d[2]);
+    muladd(a->d[2], b->d[1]);
+    muladd(a->d[3], b->d[0]);
+    extract(l[3]);
+    muladd(a->d[1], b->d[3]);
+    muladd(a->d[2], b->d[2]);
+    muladd(a->d[3], b->d[1]);
+    extract(l[4]);
+    muladd(a->d[2], b->d[3]);
+    muladd(a->d[3], b->d[2]);
+    extract(l[5]);
+    muladd_fast(a->d[3], b->d[3]);
+    extract_fast(l[6]);
+    VERIFY_CHECK(c1 <= 0);
+    l[7] = c0;
+
+    secp256k1_scalar_reduce_512(r, l);
+}
+
+static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
+    /* 160 bit accumulator. */
+    uint64_t c0 = 0, c1 = 0;
+    uint32_t c2 = 0;
+
+    uint64_t l[8];
+
+    /* l[0..7] = a[0..3] * b[0..3]. */
+    muladd_fast(a->d[0], a->d[0]);
+    extract_fast(l[0]);
+    muladd2(a->d[0], a->d[1]);
+    extract(l[1]);
+    muladd2(a->d[0], a->d[2]);
+    muladd(a->d[1], a->d[1]);
+    extract(l[2]);
+    muladd2(a->d[0], a->d[3]);
+    muladd2(a->d[1], a->d[2]);
+    extract(l[3]);
+    muladd2(a->d[1], a->d[3]);
+    muladd(a->d[2], a->d[2]);
+    extract(l[4]);
+    muladd2(a->d[2], a->d[3]);
+    extract(l[5]);
+    muladd_fast(a->d[3], a->d[3]);
+    extract_fast(l[6]);
+    VERIFY_CHECK(c1 == 0);
+    l[7] = c0;
+
+    secp256k1_scalar_reduce_512(r, l);
+}
+
+#undef sumadd
+#undef sumadd_fast
+#undef muladd
+#undef muladd_fast
+#undef muladd2
+#undef extract
+#undef extract_fast
+
+#endif
diff --git a/src/scalar_8x32.h b/src/scalar_8x32.h
new file mode 100644
index 0000000000..f70328cfc9
--- /dev/null
+++ b/src/scalar_8x32.h
@@ -0,0 +1,17 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_REPR_
+#define _SECP256K1_SCALAR_REPR_
+
+#include <stdint.h>
+
+/** A scalar modulo the group order of the secp256k1 curve. */
+typedef struct {
+    uint32_t d[8];
+} secp256k1_scalar_t;
+
+#endif
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
new file mode 100644
index 0000000000..e58be1365f
--- /dev/null
+++ b/src/scalar_8x32_impl.h
@@ -0,0 +1,572 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_REPR_IMPL_H_
+#define _SECP256K1_SCALAR_REPR_IMPL_H_
+
+/* Limbs of the secp256k1 order. */
+#define SECP256K1_N_0 ((uint32_t)0xD0364141UL)
+#define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL)
+#define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL)
+#define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL)
+#define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL)
+#define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL)
+
+/* Limbs of 2^256 minus the secp256k1 order. */
+#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
+#define SECP256K1_N_C_1 (~SECP256K1_N_1)
+#define SECP256K1_N_C_2 (~SECP256K1_N_2)
+#define SECP256K1_N_C_3 (~SECP256K1_N_3)
+#define SECP256K1_N_C_4 (1)
+
+/* Limbs of half the secp256k1 order. */
+#define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL)
+#define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL)
+#define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL)
+#define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL)
+#define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL)
+#define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL)
+
+SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar_t *r) {
+    r->d[0] = 0;
+    r->d[1] = 0;
+    r->d[2] = 0;
+    r->d[3] = 0;
+    r->d[4] = 0;
+    r->d[5] = 0;
+    r->d[6] = 0;
+    r->d[7] = 0;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, int offset, int count) {
+    VERIFY_CHECK((offset + count - 1) / 32 == offset / 32);
+    return (a->d[offset / 32] >> (offset % 32)) & ((1 << count) - 1);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[7] < SECP256K1_N_7); /* No need for a > check. */
+    no |= (a->d[6] < SECP256K1_N_6); /* No need for a > check. */
+    no |= (a->d[5] < SECP256K1_N_5); /* No need for a > check. */
+    no |= (a->d[4] < SECP256K1_N_4);
+    yes |= (a->d[4] > SECP256K1_N_4) & ~no;
+    no |= (a->d[3] < SECP256K1_N_3) & ~yes;
+    yes |= (a->d[3] > SECP256K1_N_3) & ~no;
+    no |= (a->d[2] < SECP256K1_N_2) & ~yes;
+    yes |= (a->d[2] > SECP256K1_N_2) & ~no;
+    no |= (a->d[1] < SECP256K1_N_1) & ~yes;
+    yes |= (a->d[1] > SECP256K1_N_1) & ~no;
+    yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
+    return yes;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint32_t overflow) {
+    VERIFY_CHECK(overflow <= 1);
+    uint64_t t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
+    r->d[0] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1;
+    r->d[1] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2;
+    r->d[2] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3;
+    r->d[3] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4;
+    r->d[4] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[5];
+    r->d[5] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[6];
+    r->d[6] = t & 0xFFFFFFFFUL; t >>= 32;
+    t += (uint64_t)r->d[7];
+    r->d[7] = t & 0xFFFFFFFFUL;
+    return overflow;
+}
+
+static void secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    uint64_t t = (uint64_t)a->d[0] + b->d[0];
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[4] + b->d[4];
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[5] + b->d[5];
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[6] + b->d[6];
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[7] + b->d[7];
+    r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
+    secp256k1_scalar_reduce(r, t + secp256k1_scalar_check_overflow(r));
+}
+
+static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
+    r->d[0] = (uint32_t)b32[31] | (uint32_t)b32[30] << 8 | (uint32_t)b32[29] << 16 | (uint32_t)b32[28] << 24;
+    r->d[1] = (uint32_t)b32[27] | (uint32_t)b32[26] << 8 | (uint32_t)b32[25] << 16 | (uint32_t)b32[24] << 24;
+    r->d[2] = (uint32_t)b32[23] | (uint32_t)b32[22] << 8 | (uint32_t)b32[21] << 16 | (uint32_t)b32[20] << 24;
+    r->d[3] = (uint32_t)b32[19] | (uint32_t)b32[18] << 8 | (uint32_t)b32[17] << 16 | (uint32_t)b32[16] << 24;
+    r->d[4] = (uint32_t)b32[15] | (uint32_t)b32[14] << 8 | (uint32_t)b32[13] << 16 | (uint32_t)b32[12] << 24;
+    r->d[5] = (uint32_t)b32[11] | (uint32_t)b32[10] << 8 | (uint32_t)b32[9] << 16 | (uint32_t)b32[8] << 24;
+    r->d[6] = (uint32_t)b32[7] | (uint32_t)b32[6] << 8 | (uint32_t)b32[5] << 16 | (uint32_t)b32[4] << 24;
+    r->d[7] = (uint32_t)b32[3] | (uint32_t)b32[2] << 8 | (uint32_t)b32[1] << 16 | (uint32_t)b32[0] << 24;
+    int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
+    if (overflow) {
+        *overflow = over;
+    }
+}
+
+static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) {
+    bin[0] = a->d[7] >> 24; bin[1] = a->d[7] >> 16; bin[2] = a->d[7] >> 8; bin[3] = a->d[7];
+    bin[4] = a->d[6] >> 24; bin[5] = a->d[6] >> 16; bin[6] = a->d[6] >> 8; bin[7] = a->d[6];
+    bin[8] = a->d[5] >> 24; bin[9] = a->d[5] >> 16; bin[10] = a->d[5] >> 8; bin[11] = a->d[5];
+    bin[12] = a->d[4] >> 24; bin[13] = a->d[4] >> 16; bin[14] = a->d[4] >> 8; bin[15] = a->d[4];
+    bin[16] = a->d[3] >> 24; bin[17] = a->d[3] >> 16; bin[18] = a->d[3] >> 8; bin[19] = a->d[3];
+    bin[20] = a->d[2] >> 24; bin[21] = a->d[2] >> 16; bin[22] = a->d[2] >> 8; bin[23] = a->d[2];
+    bin[24] = a->d[1] >> 24; bin[25] = a->d[1] >> 16; bin[26] = a->d[1] >> 8; bin[27] = a->d[1];
+    bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0];
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) {
+    return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
+}
+
+static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
+    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0);
+    uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1;
+    r->d[0] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[1]) + SECP256K1_N_1;
+    r->d[1] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[2]) + SECP256K1_N_2;
+    r->d[2] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[3]) + SECP256K1_N_3;
+    r->d[3] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[4]) + SECP256K1_N_4;
+    r->d[4] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[5]) + SECP256K1_N_5;
+    r->d[5] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[6]) + SECP256K1_N_6;
+    r->d[6] = t & nonzero; t >>= 32;
+    t += (uint64_t)(~a->d[7]) + SECP256K1_N_7;
+    r->d[7] = t & nonzero;
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a) {
+    return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
+}
+
+static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
+    int yes = 0;
+    int no = 0;
+    no |= (a->d[7] < SECP256K1_N_H_7);
+    yes |= (a->d[7] > SECP256K1_N_H_7) & ~no;
+    no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; /* No need for a > check. */
+    no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; /* No need for a > check. */
+    no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; /* No need for a > check. */
+    no |= (a->d[3] < SECP256K1_N_H_3) & ~yes;
+    yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
+    no |= (a->d[2] < SECP256K1_N_H_2) & ~yes;
+    yes |= (a->d[2] > SECP256K1_N_H_2) & ~no;
+    no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
+    yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
+    yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
+    return yes;
+}
+
+/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
+
+/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd(a,b) { \
+    uint32_t tl, th; \
+    { \
+        uint64_t t = (uint64_t)a * b; \
+        th = t >> 32;         /* at most 0xFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFF */ \
+    c1 += th;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < th) ? 1 : 0;  /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
+}
+
+/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
+#define muladd_fast(a,b) { \
+    uint32_t tl, th; \
+    { \
+        uint64_t t = (uint64_t)a * b; \
+        th = t >> 32;         /* at most 0xFFFFFFFE */ \
+        tl = t; \
+    } \
+    c0 += tl;                 /* overflow is handled on the next line */ \
+    th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFF */ \
+    c1 += th;                 /* never overflows by contract (verified in the next line) */ \
+    VERIFY_CHECK(c1 >= th); \
+}
+
+/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define muladd2(a,b) { \
+    uint32_t tl, th; \
+    { \
+        uint64_t t = (uint64_t)a * b; \
+        th = t >> 32;               /* at most 0xFFFFFFFE */ \
+        tl = t; \
+    } \
+    uint32_t th2 = th + th;         /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
+    c2 += (th2 < th) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
+    uint32_t tl2 = tl + tl;         /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
+    th2 += (tl2 < tl) ? 1 : 0;      /* at most 0xFFFFFFFF */ \
+    c0 += tl2;                      /* overflow is handled on the next line */ \
+    th2 += (c0 < tl2) ? 1 : 0;      /* second overflow is handled on the next line */ \
+    c2 += (c0 < tl2) & (th2 == 0);  /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
+    c1 += th2;                      /* overflow is handled on the next line */ \
+    c2 += (c1 < th2) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
+}
+
+/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
+#define sumadd(a) { \
+    c0 += (a);                  /* overflow is handled on the next line */ \
+    unsigned int over = (c0 < (a)) ? 1 : 0; \
+    c1 += over;                 /* overflow is handled on the next line */ \
+    c2 += (c1 < over) ? 1 : 0;  /* never overflows by contract */ \
+}
+
+/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
+#define sumadd_fast(a) { \
+    c0 += (a);                 /* overflow is handled on the next line */ \
+    c1 += (c0 < (a)) ? 1 : 0;  /* never overflows by contract (verified the next line) */ \
+    VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */
+#define extract(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = c2; \
+    c2 = 0; \
+}
+
+/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */
+#define extract_fast(n) { \
+    (n) = c0; \
+    c0 = c1; \
+    c1 = 0; \
+    VERIFY_CHECK(c2 == 0); \
+}
+
+static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l) {
+    uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15];
+
+    /* 96 bit accumulator. */
+    uint32_t c0, c1, c2;
+
+    /* Reduce 512 bits into 385. */
+    /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */
+    c0 = l[0]; c1 = 0; c2 = 0;
+    muladd_fast(n0, SECP256K1_N_C_0);
+    uint32_t m0; extract_fast(m0);
+    sumadd_fast(l[1]);
+    muladd(n1, SECP256K1_N_C_0);
+    muladd(n0, SECP256K1_N_C_1);
+    uint32_t m1; extract(m1);
+    sumadd(l[2]);
+    muladd(n2, SECP256K1_N_C_0);
+    muladd(n1, SECP256K1_N_C_1);
+    muladd(n0, SECP256K1_N_C_2);
+    uint32_t m2; extract(m2);
+    sumadd(l[3]);
+    muladd(n3, SECP256K1_N_C_0);
+    muladd(n2, SECP256K1_N_C_1);
+    muladd(n1, SECP256K1_N_C_2);
+    muladd(n0, SECP256K1_N_C_3);
+    uint32_t m3; extract(m3);
+    sumadd(l[4]);
+    muladd(n4, SECP256K1_N_C_0);
+    muladd(n3, SECP256K1_N_C_1);
+    muladd(n2, SECP256K1_N_C_2);
+    muladd(n1, SECP256K1_N_C_3);
+    sumadd(n0);
+    uint32_t m4; extract(m4);
+    sumadd(l[5]);
+    muladd(n5, SECP256K1_N_C_0);
+    muladd(n4, SECP256K1_N_C_1);
+    muladd(n3, SECP256K1_N_C_2);
+    muladd(n2, SECP256K1_N_C_3);
+    sumadd(n1);
+    uint32_t m5; extract(m5);
+    sumadd(l[6]);
+    muladd(n6, SECP256K1_N_C_0);
+    muladd(n5, SECP256K1_N_C_1);
+    muladd(n4, SECP256K1_N_C_2);
+    muladd(n3, SECP256K1_N_C_3);
+    sumadd(n2);
+    uint32_t m6; extract(m6);
+    sumadd(l[7]);
+    muladd(n7, SECP256K1_N_C_0);
+    muladd(n6, SECP256K1_N_C_1);
+    muladd(n5, SECP256K1_N_C_2);
+    muladd(n4, SECP256K1_N_C_3);
+    sumadd(n3);
+    uint32_t m7; extract(m7);
+    muladd(n7, SECP256K1_N_C_1);
+    muladd(n6, SECP256K1_N_C_2);
+    muladd(n5, SECP256K1_N_C_3);
+    sumadd(n4);
+    uint32_t m8; extract(m8);
+    muladd(n7, SECP256K1_N_C_2);
+    muladd(n6, SECP256K1_N_C_3);
+    sumadd(n5);
+    uint32_t m9; extract(m9);
+    muladd(n7, SECP256K1_N_C_3);
+    sumadd(n6);
+    uint32_t m10; extract(m10);
+    sumadd_fast(n7);
+    uint32_t m11; extract_fast(m11);
+    VERIFY_CHECK(c0 <= 1);
+    uint32_t m12 = c0;
+
+    /* Reduce 385 bits into 258. */
+    /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */
+    c0 = m0; c1 = 0; c2 = 0;
+    muladd_fast(m8, SECP256K1_N_C_0);
+    uint32_t p0; extract_fast(p0);
+    sumadd_fast(m1);
+    muladd(m9, SECP256K1_N_C_0);
+    muladd(m8, SECP256K1_N_C_1);
+    uint32_t p1; extract(p1);
+    sumadd(m2);
+    muladd(m10, SECP256K1_N_C_0);
+    muladd(m9, SECP256K1_N_C_1);
+    muladd(m8, SECP256K1_N_C_2);
+    uint32_t p2; extract(p2);
+    sumadd(m3);
+    muladd(m11, SECP256K1_N_C_0);
+    muladd(m10, SECP256K1_N_C_1);
+    muladd(m9, SECP256K1_N_C_2);
+    muladd(m8, SECP256K1_N_C_3);
+    uint32_t p3; extract(p3);
+    sumadd(m4);
+    muladd(m12, SECP256K1_N_C_0);
+    muladd(m11, SECP256K1_N_C_1);
+    muladd(m10, SECP256K1_N_C_2);
+    muladd(m9, SECP256K1_N_C_3);
+    sumadd(m8);
+    uint32_t p4; extract(p4);
+    sumadd(m5);
+    muladd(m12, SECP256K1_N_C_1);
+    muladd(m11, SECP256K1_N_C_2);
+    muladd(m10, SECP256K1_N_C_3);
+    sumadd(m9);
+    uint32_t p5; extract(p5);
+    sumadd(m6);
+    muladd(m12, SECP256K1_N_C_2);
+    muladd(m11, SECP256K1_N_C_3);
+    sumadd(m10);
+    uint32_t p6; extract(p6);
+    sumadd_fast(m7);
+    muladd_fast(m12, SECP256K1_N_C_3);
+    sumadd_fast(m11);
+    uint32_t p7; extract_fast(p7);
+    uint32_t p8 = c0 + m12;
+    VERIFY_CHECK(p8 <= 2);
+
+    /* Reduce 258 bits into 256. */
+    /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */
+    uint64_t c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
+    r->d[0] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p1 + (uint64_t)SECP256K1_N_C_1 * p8;
+    r->d[1] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p2 + (uint64_t)SECP256K1_N_C_2 * p8;
+    r->d[2] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p3 + (uint64_t)SECP256K1_N_C_3 * p8;
+    r->d[3] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p4 + (uint64_t)p8;
+    r->d[4] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p5;
+    r->d[5] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p6;
+    r->d[6] = c & 0xFFFFFFFFUL; c >>= 32;
+    c += p7;
+    r->d[7] = c & 0xFFFFFFFFUL; c >>= 32;
+
+    /* Final reduction of r. */
+    secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
+}
+
+static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    /* 96 bit accumulator. */
+    uint32_t c0 = 0, c1 = 0, c2 = 0;
+
+    uint32_t l[16];
+
+    /* l[0..15] = a[0..7] * b[0..7]. */
+    muladd_fast(a->d[0], b->d[0]);
+    extract_fast(l[0]);
+    muladd(a->d[0], b->d[1]);
+    muladd(a->d[1], b->d[0]);
+    extract(l[1]);
+    muladd(a->d[0], b->d[2]);
+    muladd(a->d[1], b->d[1]);
+    muladd(a->d[2], b->d[0]);
+    extract(l[2]);
+    muladd(a->d[0], b->d[3]);
+    muladd(a->d[1], b->d[2]);
+    muladd(a->d[2], b->d[1]);
+    muladd(a->d[3], b->d[0]);
+    extract(l[3]);
+    muladd(a->d[0], b->d[4]);
+    muladd(a->d[1], b->d[3]);
+    muladd(a->d[2], b->d[2]);
+    muladd(a->d[3], b->d[1]);
+    muladd(a->d[4], b->d[0]);
+    extract(l[4]);
+    muladd(a->d[0], b->d[5]);
+    muladd(a->d[1], b->d[4]);
+    muladd(a->d[2], b->d[3]);
+    muladd(a->d[3], b->d[2]);
+    muladd(a->d[4], b->d[1]);
+    muladd(a->d[5], b->d[0]);
+    extract(l[5]);
+    muladd(a->d[0], b->d[6]);
+    muladd(a->d[1], b->d[5]);
+    muladd(a->d[2], b->d[4]);
+    muladd(a->d[3], b->d[3]);
+    muladd(a->d[4], b->d[2]);
+    muladd(a->d[5], b->d[1]);
+    muladd(a->d[6], b->d[0]);
+    extract(l[6]);
+    muladd(a->d[0], b->d[7]);
+    muladd(a->d[1], b->d[6]);
+    muladd(a->d[2], b->d[5]);
+    muladd(a->d[3], b->d[4]);
+    muladd(a->d[4], b->d[3]);
+    muladd(a->d[5], b->d[2]);
+    muladd(a->d[6], b->d[1]);
+    muladd(a->d[7], b->d[0]);
+    extract(l[7]);
+    muladd(a->d[1], b->d[7]);
+    muladd(a->d[2], b->d[6]);
+    muladd(a->d[3], b->d[5]);
+    muladd(a->d[4], b->d[4]);
+    muladd(a->d[5], b->d[3]);
+    muladd(a->d[6], b->d[2]);
+    muladd(a->d[7], b->d[1]);
+    extract(l[8]);
+    muladd(a->d[2], b->d[7]);
+    muladd(a->d[3], b->d[6]);
+    muladd(a->d[4], b->d[5]);
+    muladd(a->d[5], b->d[4]);
+    muladd(a->d[6], b->d[3]);
+    muladd(a->d[7], b->d[2]);
+    extract(l[9]);
+    muladd(a->d[3], b->d[7]);
+    muladd(a->d[4], b->d[6]);
+    muladd(a->d[5], b->d[5]);
+    muladd(a->d[6], b->d[4]);
+    muladd(a->d[7], b->d[3]);
+    extract(l[10]);
+    muladd(a->d[4], b->d[7]);
+    muladd(a->d[5], b->d[6]);
+    muladd(a->d[6], b->d[5]);
+    muladd(a->d[7], b->d[4]);
+    extract(l[11]);
+    muladd(a->d[5], b->d[7]);
+    muladd(a->d[6], b->d[6]);
+    muladd(a->d[7], b->d[5]);
+    extract(l[12]);
+    muladd(a->d[6], b->d[7]);
+    muladd(a->d[7], b->d[6]);
+    extract(l[13]);
+    muladd_fast(a->d[7], b->d[7]);
+    extract_fast(l[14]);
+    VERIFY_CHECK(c1 == 0);
+    l[15] = c0;
+
+    secp256k1_scalar_reduce_512(r, l);
+}
+
+static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
+    /* 96 bit accumulator. */
+    uint32_t c0 = 0, c1 = 0, c2 = 0;
+
+    uint32_t l[16];
+
+    /* l[0..15] = a[0..7]^2. */
+    muladd_fast(a->d[0], a->d[0]);
+    extract_fast(l[0]);
+    muladd2(a->d[0], a->d[1]);
+    extract(l[1]);
+    muladd2(a->d[0], a->d[2]);
+    muladd(a->d[1], a->d[1]);
+    extract(l[2]);
+    muladd2(a->d[0], a->d[3]);
+    muladd2(a->d[1], a->d[2]);
+    extract(l[3]);
+    muladd2(a->d[0], a->d[4]);
+    muladd2(a->d[1], a->d[3]);
+    muladd(a->d[2], a->d[2]);
+    extract(l[4]);
+    muladd2(a->d[0], a->d[5]);
+    muladd2(a->d[1], a->d[4]);
+    muladd2(a->d[2], a->d[3]);
+    extract(l[5]);
+    muladd2(a->d[0], a->d[6]);
+    muladd2(a->d[1], a->d[5]);
+    muladd2(a->d[2], a->d[4]);
+    muladd(a->d[3], a->d[3]);
+    extract(l[6]);
+    muladd2(a->d[0], a->d[7]);
+    muladd2(a->d[1], a->d[6]);
+    muladd2(a->d[2], a->d[5]);
+    muladd2(a->d[3], a->d[4]);
+    extract(l[7]);
+    muladd2(a->d[1], a->d[7]);
+    muladd2(a->d[2], a->d[6]);
+    muladd2(a->d[3], a->d[5]);
+    muladd(a->d[4], a->d[4]);
+    extract(l[8]);
+    muladd2(a->d[2], a->d[7]);
+    muladd2(a->d[3], a->d[6]);
+    muladd2(a->d[4], a->d[5]);
+    extract(l[9]);
+    muladd2(a->d[3], a->d[7]);
+    muladd2(a->d[4], a->d[6]);
+    muladd(a->d[5], a->d[5]);
+    extract(l[10]);
+    muladd2(a->d[4], a->d[7]);
+    muladd2(a->d[5], a->d[6]);
+    extract(l[11]);
+    muladd2(a->d[5], a->d[7]);
+    muladd(a->d[6], a->d[6]);
+    extract(l[12]);
+    muladd2(a->d[6], a->d[7]);
+    extract(l[13]);
+    muladd_fast(a->d[7], a->d[7]);
+    extract_fast(l[14]);
+    VERIFY_CHECK(c1 == 0);
+    l[15] = c0;
+
+    secp256k1_scalar_reduce_512(r, l);
+}
+
+#undef sumadd
+#undef sumadd_fast
+#undef muladd
+#undef muladd_fast
+#undef muladd2
+#undef extract
+#undef extract_fast
+
+#endif
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
new file mode 100644
index 0000000000..ddc5061c76
--- /dev/null
+++ b/src/scalar_impl.h
@@ -0,0 +1,184 @@
+/**********************************************************************
+ * Copyright (c) 2014 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCALAR_IMPL_H_
+#define _SECP256K1_SCALAR_IMPL_H_
+
+#include <string.h>
+
+#include "scalar.h"
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#if defined(USE_SCALAR_4X64)
+#include "scalar_4x64_impl.h"
+#elif defined(USE_SCALAR_8X32)
+#include "scalar_8x32_impl.h"
+#else
+#error "Please select scalar implementation"
+#endif
+
+static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a) {
+    unsigned char c[32];
+    secp256k1_scalar_get_b32(c, a);
+    secp256k1_num_set_bin(r, c, 32);
+}
+
+
+static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) {
+    /* First compute x ^ (2^N - 1) for some values of N. */
+    secp256k1_scalar_t x2, x3, x4, x6, x7, x8, x15, x30, x60, x120, x127;
+
+    secp256k1_scalar_sqr(&x2,  x);
+    secp256k1_scalar_mul(&x2, &x2,  x);
+
+    secp256k1_scalar_sqr(&x3, &x2);
+    secp256k1_scalar_mul(&x3, &x3,  x);
+
+    secp256k1_scalar_sqr(&x4, &x3);
+    secp256k1_scalar_mul(&x4, &x4,  x);
+
+    secp256k1_scalar_sqr(&x6, &x4);
+    secp256k1_scalar_sqr(&x6, &x6);
+    secp256k1_scalar_mul(&x6, &x6, &x2);
+
+    secp256k1_scalar_sqr(&x7, &x6);
+    secp256k1_scalar_mul(&x7, &x7,  x);
+
+    secp256k1_scalar_sqr(&x8, &x7);
+    secp256k1_scalar_mul(&x8, &x8,  x);
+
+    secp256k1_scalar_sqr(&x15, &x8);
+    for (int i=0; i<6; i++)
+        secp256k1_scalar_sqr(&x15, &x15);
+    secp256k1_scalar_mul(&x15, &x15, &x7);
+
+    secp256k1_scalar_sqr(&x30, &x15);
+    for (int i=0; i<14; i++)
+        secp256k1_scalar_sqr(&x30, &x30);
+    secp256k1_scalar_mul(&x30, &x30, &x15);
+
+    secp256k1_scalar_sqr(&x60, &x30);
+    for (int i=0; i<29; i++)
+        secp256k1_scalar_sqr(&x60, &x60);
+    secp256k1_scalar_mul(&x60, &x60, &x30);
+
+    secp256k1_scalar_sqr(&x120, &x60);
+    for (int i=0; i<59; i++)
+        secp256k1_scalar_sqr(&x120, &x120);
+    secp256k1_scalar_mul(&x120, &x120, &x60);
+
+    secp256k1_scalar_sqr(&x127, &x120);
+    for (int i=0; i<6; i++)
+        secp256k1_scalar_sqr(&x127, &x127);
+    secp256k1_scalar_mul(&x127, &x127, &x7);
+
+    /* Then accumulate the final result (t starts at x127). */
+    secp256k1_scalar_t *t = &x127;
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<4; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<4; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (int i=0; i<3; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (int i=0; i<4; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (int i=0; i<5; i++) /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (int i=0; i<4; i++) /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<5; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x4); /* 1111 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<3; i++) /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<4; i++) /* 000 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<10; i++) /* 0000000 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (int i=0; i<4; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x3); /* 111 */
+    for (int i=0; i<9; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x8); /* 11111111 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<3; i++) /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<3; i++) /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<5; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x4); /* 1111 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<5; i++) /* 000 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (int i=0; i<4; i++) /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (int i=0; i<2; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<8; i++) /* 000000 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (int i=0; i<3; i++) /* 0 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, &x2); /* 11 */
+    for (int i=0; i<3; i++) /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<6; i++) /* 00000 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(t, t, x); /* 1 */
+    for (int i=0; i<8; i++) /* 00 */
+        secp256k1_scalar_sqr(t, t);
+    secp256k1_scalar_mul(r, t, &x6); /* 111111 */
+}
+
+#endif
diff --git a/src/secp256k1.c b/src/secp256k1.c
new file mode 100644
index 0000000000..1ab5b3722c
--- /dev/null
+++ b/src/secp256k1.c
@@ -0,0 +1,305 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#define SECP256K1_BUILD (1)
+
+#include "include/secp256k1.h"
+
+#include "util.h"
+#include "num_impl.h"
+#include "field_impl.h"
+#include "scalar_impl.h"
+#include "group_impl.h"
+#include "ecmult_impl.h"
+#include "ecmult_gen_impl.h"
+#include "ecdsa_impl.h"
+#include "eckey_impl.h"
+
+void secp256k1_start(unsigned int flags) {
+    secp256k1_fe_start();
+    secp256k1_ge_start();
+    if (flags & SECP256K1_START_SIGN) {
+        secp256k1_ecmult_gen_start();
+    }
+    if (flags & SECP256K1_START_VERIFY) {
+        secp256k1_ecmult_start();
+    }
+}
+
+void secp256k1_stop(void) {
+    secp256k1_ecmult_stop();
+    secp256k1_ecmult_gen_stop();
+    secp256k1_ge_stop();
+    secp256k1_fe_stop();
+}
+
+int secp256k1_ecdsa_verify(const unsigned char *msg, int msglen, const unsigned char *sig, int siglen, const unsigned char *pubkey, int pubkeylen) {
+    DEBUG_CHECK(secp256k1_ecmult_consts != NULL);
+    DEBUG_CHECK(msg != NULL);
+    DEBUG_CHECK(msglen <= 32);
+    DEBUG_CHECK(sig != NULL);
+    DEBUG_CHECK(pubkey != NULL);
+
+    int ret = -3;
+    secp256k1_num_t m;
+    secp256k1_ecdsa_sig_t s;
+    secp256k1_ge_t q;
+    secp256k1_num_set_bin(&m, msg, msglen);
+
+    if (!secp256k1_eckey_pubkey_parse(&q, pubkey, pubkeylen)) {
+        ret = -1;
+        goto end;
+    }
+    if (!secp256k1_ecdsa_sig_parse(&s, sig, siglen)) {
+        ret = -2;
+        goto end;
+    }
+    if (!secp256k1_ecdsa_sig_verify(&s, &q, &m)) {
+        ret = 0;
+        goto end;
+    }
+    ret = 1;
+end:
+    return ret;
+}
+
+int secp256k1_ecdsa_sign(const unsigned char *message, int messagelen, unsigned char *signature, int *signaturelen, const unsigned char *seckey, const unsigned char *nonce) {
+    DEBUG_CHECK(secp256k1_ecmult_gen_consts != NULL);
+    DEBUG_CHECK(message != NULL);
+    DEBUG_CHECK(messagelen <= 32);
+    DEBUG_CHECK(signature != NULL);
+    DEBUG_CHECK(signaturelen != NULL);
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(nonce != NULL);
+
+    secp256k1_scalar_t sec, non, msg;
+    secp256k1_scalar_set_b32(&sec, seckey, NULL);
+    int overflow = 0;
+    secp256k1_scalar_set_b32(&non, nonce, &overflow);
+    {
+        unsigned char c[32] = {0};
+        memcpy(c + 32 - messagelen, message, messagelen);
+        secp256k1_scalar_set_b32(&msg, c, NULL);
+        memset(c, 0, 32);
+    }
+    int ret = !secp256k1_scalar_is_zero(&non) && !overflow;
+    secp256k1_ecdsa_sig_t sig;
+    if (ret) {
+        ret = secp256k1_ecdsa_sig_sign(&sig, &sec, &msg, &non, NULL);
+    }
+    if (ret) {
+        secp256k1_ecdsa_sig_serialize(signature, signaturelen, &sig);
+    }
+    secp256k1_scalar_clear(&msg);
+    secp256k1_scalar_clear(&non);
+    secp256k1_scalar_clear(&sec);
+    return ret;
+}
+
+int secp256k1_ecdsa_sign_compact(const unsigned char *message, int messagelen, unsigned char *sig64, const unsigned char *seckey, const unsigned char *nonce, int *recid) {
+    DEBUG_CHECK(secp256k1_ecmult_gen_consts != NULL);
+    DEBUG_CHECK(message != NULL);
+    DEBUG_CHECK(messagelen <= 32);
+    DEBUG_CHECK(sig64 != NULL);
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(nonce != NULL);
+
+    secp256k1_scalar_t sec, non, msg;
+    secp256k1_scalar_set_b32(&sec, seckey, NULL);
+    int overflow = 0;
+    secp256k1_scalar_set_b32(&non, nonce, &overflow);
+    {
+        unsigned char c[32] = {0};
+        memcpy(c + 32 - messagelen, message, messagelen);
+        secp256k1_scalar_set_b32(&msg, c, NULL);
+        memset(c, 0, 32);
+    }
+    int ret = !secp256k1_scalar_is_zero(&non) && !overflow;
+    secp256k1_ecdsa_sig_t sig;
+    if (ret) {
+        ret = secp256k1_ecdsa_sig_sign(&sig, &sec, &msg, &non, recid);
+    }
+    if (ret) {
+        secp256k1_num_get_bin(sig64, 32, &sig.r);
+        secp256k1_num_get_bin(sig64 + 32, 32, &sig.s);
+    }
+    secp256k1_scalar_clear(&msg);
+    secp256k1_scalar_clear(&non);
+    secp256k1_scalar_clear(&sec);
+    return ret;
+}
+
+int secp256k1_ecdsa_recover_compact(const unsigned char *msg, int msglen, const unsigned char *sig64, unsigned char *pubkey, int *pubkeylen, int compressed, int recid) {
+    DEBUG_CHECK(secp256k1_ecmult_consts != NULL);
+    DEBUG_CHECK(msg != NULL);
+    DEBUG_CHECK(msglen <= 32);
+    DEBUG_CHECK(sig64 != NULL);
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(pubkeylen != NULL);
+    DEBUG_CHECK(recid >= 0 && recid <= 3);
+
+    int ret = 0;
+    secp256k1_num_t m;
+    secp256k1_ecdsa_sig_t sig;
+    secp256k1_num_set_bin(&sig.r, sig64, 32);
+    secp256k1_num_set_bin(&sig.s, sig64 + 32, 32);
+    secp256k1_num_set_bin(&m, msg, msglen);
+
+    secp256k1_ge_t q;
+    if (secp256k1_ecdsa_sig_recover(&sig, &q, &m, recid)) {
+        ret = secp256k1_eckey_pubkey_serialize(&q, pubkey, pubkeylen, compressed);
+    }
+    return ret;
+}
+
+int secp256k1_ec_seckey_verify(const unsigned char *seckey) {
+    DEBUG_CHECK(seckey != NULL);
+
+    secp256k1_scalar_t sec;
+    int overflow;
+    secp256k1_scalar_set_b32(&sec, seckey, &overflow);
+    int ret = !secp256k1_scalar_is_zero(&sec) && !overflow;
+    secp256k1_scalar_clear(&sec);
+    return ret;
+}
+
+int secp256k1_ec_pubkey_verify(const unsigned char *pubkey, int pubkeylen) {
+    DEBUG_CHECK(pubkey != NULL);
+
+    secp256k1_ge_t q;
+    return secp256k1_eckey_pubkey_parse(&q, pubkey, pubkeylen);
+}
+
+int secp256k1_ec_pubkey_create(unsigned char *pubkey, int *pubkeylen, const unsigned char *seckey, int compressed) {
+    DEBUG_CHECK(secp256k1_ecmult_gen_consts != NULL);
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(pubkeylen != NULL);
+    DEBUG_CHECK(seckey != NULL);
+
+    secp256k1_scalar_t sec;
+    secp256k1_scalar_set_b32(&sec, seckey, NULL);
+    secp256k1_gej_t pj;
+    secp256k1_ecmult_gen(&pj, &sec);
+    secp256k1_scalar_clear(&sec);
+    secp256k1_ge_t p;
+    secp256k1_ge_set_gej(&p, &pj);
+    return secp256k1_eckey_pubkey_serialize(&p, pubkey, pubkeylen, compressed);
+}
+
+int secp256k1_ec_pubkey_decompress(unsigned char *pubkey, int *pubkeylen) {
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(pubkeylen != NULL);
+
+    secp256k1_ge_t p;
+    if (!secp256k1_eckey_pubkey_parse(&p, pubkey, *pubkeylen))
+        return 0;
+    return secp256k1_eckey_pubkey_serialize(&p, pubkey, pubkeylen, 0);
+}
+
+int secp256k1_ec_privkey_tweak_add(unsigned char *seckey, const unsigned char *tweak) {
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(tweak != NULL);
+
+    secp256k1_scalar_t term;
+    int overflow = 0;
+    secp256k1_scalar_set_b32(&term, tweak, &overflow);
+    secp256k1_scalar_t sec;
+    secp256k1_scalar_set_b32(&sec, seckey, NULL);
+
+    int ret = secp256k1_eckey_privkey_tweak_add(&sec, &term) && !overflow;
+    if (ret) {
+        secp256k1_scalar_get_b32(seckey, &sec);
+    }
+
+    secp256k1_scalar_clear(&sec);
+    secp256k1_scalar_clear(&term);
+    return ret;
+}
+
+int secp256k1_ec_pubkey_tweak_add(unsigned char *pubkey, int pubkeylen, const unsigned char *tweak) {
+    DEBUG_CHECK(secp256k1_ecmult_consts != NULL);
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(tweak != NULL);
+
+    secp256k1_num_t term;
+    secp256k1_num_set_bin(&term, tweak, 32);
+    secp256k1_ge_t p;
+    int ret = secp256k1_eckey_pubkey_parse(&p, pubkey, pubkeylen);
+    if (ret) {
+        ret = secp256k1_eckey_pubkey_tweak_add(&p, &term);
+    }
+    if (ret) {
+        int oldlen = pubkeylen;
+        ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
+        VERIFY_CHECK(pubkeylen == oldlen);
+    }
+
+    return ret;
+}
+
+int secp256k1_ec_privkey_tweak_mul(unsigned char *seckey, const unsigned char *tweak) {
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(tweak != NULL);
+
+    secp256k1_scalar_t factor;
+    int overflow = 0;
+    secp256k1_scalar_set_b32(&factor, tweak, &overflow);
+    secp256k1_scalar_t sec;
+    secp256k1_scalar_set_b32(&sec, seckey, NULL);
+    int ret = secp256k1_eckey_privkey_tweak_mul(&sec, &factor) && !overflow;
+    if (ret) {
+        secp256k1_scalar_get_b32(seckey, &sec);
+    }
+
+    secp256k1_scalar_clear(&sec);
+    secp256k1_scalar_clear(&factor);
+    return ret;
+}
+
+int secp256k1_ec_pubkey_tweak_mul(unsigned char *pubkey, int pubkeylen, const unsigned char *tweak) {
+    DEBUG_CHECK(secp256k1_ecmult_consts != NULL);
+    DEBUG_CHECK(pubkey != NULL);
+    DEBUG_CHECK(tweak != NULL);
+
+    secp256k1_num_t factor;
+    secp256k1_num_set_bin(&factor, tweak, 32);
+    secp256k1_ge_t p;
+    int ret = secp256k1_eckey_pubkey_parse(&p, pubkey, pubkeylen);
+    if (ret) {
+        ret = secp256k1_eckey_pubkey_tweak_mul(&p, &factor);
+    }
+    if (ret) {
+        int oldlen = pubkeylen;
+        ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
+        VERIFY_CHECK(pubkeylen == oldlen);
+    }
+
+    return ret;
+}
+
+int secp256k1_ec_privkey_export(const unsigned char *seckey, unsigned char *privkey, int *privkeylen, int compressed) {
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(privkey != NULL);
+    DEBUG_CHECK(privkeylen != NULL);
+
+    secp256k1_scalar_t key;
+    secp256k1_scalar_set_b32(&key, seckey, NULL);
+    int ret = secp256k1_eckey_privkey_serialize(privkey, privkeylen, &key, compressed);
+    secp256k1_scalar_clear(&key);
+    return ret;
+}
+
+int secp256k1_ec_privkey_import(unsigned char *seckey, const unsigned char *privkey, int privkeylen) {
+    DEBUG_CHECK(seckey != NULL);
+    DEBUG_CHECK(privkey != NULL);
+
+    secp256k1_scalar_t key;
+    int ret = secp256k1_eckey_privkey_parse(&key, privkey, privkeylen);
+    if (ret)
+        secp256k1_scalar_get_b32(seckey, &key);
+    secp256k1_scalar_clear(&key);
+    return ret;
+}
diff --git a/src/testrand.h b/src/testrand.h
new file mode 100644
index 0000000000..018b65cd53
--- /dev/null
+++ b/src/testrand.h
@@ -0,0 +1,26 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_TESTRAND_H_
+#define _SECP256K1_TESTRAND_H_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+/** Seed the pseudorandom number generator. */
+SECP256K1_INLINE static void secp256k1_rand_seed(uint64_t v);
+
+/** Generate a pseudorandom 32-bit number. */
+static uint32_t secp256k1_rand32(void);
+
+/** Generate a pseudorandom 32-byte array. */
+static void secp256k1_rand256(unsigned char *b32);
+
+/** Generate a pseudorandom 32-byte array with long sequences of zero and one bits. */
+static void secp256k1_rand256_test(unsigned char *b32);
+
+#endif
diff --git a/src/testrand_impl.h b/src/testrand_impl.h
new file mode 100644
index 0000000000..677c4b9a0e
--- /dev/null
+++ b/src/testrand_impl.h
@@ -0,0 +1,60 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_TESTRAND_IMPL_H_
+#define _SECP256K1_TESTRAND_IMPL_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "testrand.h"
+
+static uint32_t secp256k1_Rz = 11, secp256k1_Rw = 11;
+
+SECP256K1_INLINE static void secp256k1_rand_seed(uint64_t v) {
+    secp256k1_Rz = v >> 32;
+    secp256k1_Rw = v;
+
+    if (secp256k1_Rz == 0 || secp256k1_Rz == 0x9068ffffU) {
+        secp256k1_Rz = 111;
+    }
+    if (secp256k1_Rw == 0 || secp256k1_Rw == 0x464fffffU) {
+        secp256k1_Rw = 111;
+    }
+}
+
+SECP256K1_INLINE static uint32_t secp256k1_rand32(void) {
+    secp256k1_Rz = 36969 * (secp256k1_Rz & 0xFFFF) + (secp256k1_Rz >> 16);
+    secp256k1_Rw = 18000 * (secp256k1_Rw & 0xFFFF) + (secp256k1_Rw >> 16);
+    return (secp256k1_Rw << 16) + (secp256k1_Rw >> 16) + secp256k1_Rz;
+}
+
+static void secp256k1_rand256(unsigned char *b32) {
+    for (int i=0; i<8; i++) {
+        uint32_t r = secp256k1_rand32();
+        b32[i*4 + 0] = (r >>  0) & 0xFF;
+        b32[i*4 + 1] = (r >>  8) & 0xFF;
+        b32[i*4 + 2] = (r >> 16) & 0xFF;
+        b32[i*4 + 3] = (r >> 24) & 0xFF;
+    }
+}
+
+static void secp256k1_rand256_test(unsigned char *b32) {
+    int bits=0;
+    memset(b32, 0, 32);
+    while (bits < 256) {
+        uint32_t ent = secp256k1_rand32();
+        int now = 1 + ((ent % 64)*((ent >> 6) % 32)+16)/31;
+        uint32_t val = 1 & (ent >> 11);
+        while (now > 0 && bits < 256) {
+            b32[bits / 8] |= val << (bits % 8);
+            now--;
+            bits++;
+        }
+    }
+}
+
+#endif
diff --git a/src/tests.c b/src/tests.c
new file mode 100644
index 0000000000..5d9b8344d9
--- /dev/null
+++ b/src/tests.c
@@ -0,0 +1,1080 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "secp256k1.c"
+#include "testrand_impl.h"
+
+#ifdef ENABLE_OPENSSL_TESTS
+#include "openssl/bn.h"
+#include "openssl/ec.h"
+#include "openssl/ecdsa.h"
+#include "openssl/obj_mac.h"
+#endif
+
+static int count = 64;
+
+/***** NUM TESTS *****/
+
+void random_num_negate(secp256k1_num_t *num) {
+    if (secp256k1_rand32() & 1)
+        secp256k1_num_negate(num);
+}
+
+void random_field_element_test(secp256k1_fe_t *fe) {
+    do {
+        unsigned char b32[32];
+        secp256k1_rand256_test(b32);
+        secp256k1_num_t num;
+        secp256k1_num_set_bin(&num, b32, 32);
+        if (secp256k1_num_cmp(&num, &secp256k1_fe_consts->p) >= 0)
+            continue;
+        secp256k1_fe_set_b32(fe, b32);
+        break;
+    } while(1);
+}
+
+void random_field_element_magnitude(secp256k1_fe_t *fe) {
+    secp256k1_fe_normalize(fe);
+    int n = secp256k1_rand32() % 4;
+    for (int i = 0; i < n; i++) {
+        secp256k1_fe_negate(fe, fe, 1 + 2*i);
+        secp256k1_fe_negate(fe, fe, 2 + 2*i);
+    }
+}
+
+void random_group_element_test(secp256k1_ge_t *ge) {
+    secp256k1_fe_t fe;
+    do {
+        random_field_element_test(&fe);
+        if (secp256k1_ge_set_xo(ge, &fe, secp256k1_rand32() & 1))
+            break;
+    } while(1);
+}
+
+void random_group_element_jacobian_test(secp256k1_gej_t *gej, const secp256k1_ge_t *ge) {
+    do {
+        random_field_element_test(&gej->z);
+        if (!secp256k1_fe_is_zero(&gej->z)) {
+            break;
+        }
+    } while(1);
+    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &gej->z);
+    secp256k1_fe_t z3; secp256k1_fe_mul(&z3, &z2, &gej->z);
+    secp256k1_fe_mul(&gej->x, &ge->x, &z2);
+    secp256k1_fe_mul(&gej->y, &ge->y, &z3);
+    gej->infinity = ge->infinity;
+}
+
+void random_num_order_test(secp256k1_num_t *num) {
+    do {
+        unsigned char b32[32];
+        secp256k1_rand256_test(b32);
+        secp256k1_num_set_bin(num, b32, 32);
+        if (secp256k1_num_is_zero(num))
+            continue;
+        if (secp256k1_num_cmp(num, &secp256k1_ge_consts->order) >= 0)
+            continue;
+        break;
+    } while(1);
+}
+
+void random_scalar_order_test(secp256k1_scalar_t *num) {
+    do {
+        unsigned char b32[32];
+        secp256k1_rand256_test(b32);
+        int overflow = 0;
+        secp256k1_scalar_set_b32(num, b32, &overflow);
+        if (overflow || secp256k1_scalar_is_zero(num))
+            continue;
+        break;
+    } while(1);
+}
+
+void random_num_order(secp256k1_num_t *num) {
+    do {
+        unsigned char b32[32];
+        secp256k1_rand256(b32);
+        secp256k1_num_set_bin(num, b32, 32);
+        if (secp256k1_num_is_zero(num))
+            continue;
+        if (secp256k1_num_cmp(num, &secp256k1_ge_consts->order) >= 0)
+            continue;
+        break;
+    } while(1);
+}
+
+void test_num_copy_inc_cmp(void) {
+    secp256k1_num_t n1,n2;
+    random_num_order(&n1);
+    secp256k1_num_copy(&n2, &n1);
+    CHECK(secp256k1_num_eq(&n1, &n2));
+    CHECK(secp256k1_num_eq(&n2, &n1));
+    secp256k1_num_inc(&n2);
+    CHECK(!secp256k1_num_eq(&n1, &n2));
+    CHECK(!secp256k1_num_eq(&n2, &n1));
+}
+
+
+void test_num_get_set_hex(void) {
+    secp256k1_num_t n1,n2;
+    random_num_order_test(&n1);
+    char c[64];
+    secp256k1_num_get_hex(c, 64, &n1);
+    secp256k1_num_set_hex(&n2, c, 64);
+    CHECK(secp256k1_num_eq(&n1, &n2));
+    for (int i=0; i<64; i++) {
+        /* check whether the lower 4 bits correspond to the last hex character */
+        int low1 = secp256k1_num_shift(&n1, 4);
+        int lowh = c[63];
+        int low2 = ((lowh>>6)*9+(lowh-'0'))&15;
+        CHECK(low1 == low2);
+        /* shift bits off the hex representation, and compare */
+        memmove(c+1, c, 63);
+        c[0] = '0';
+        secp256k1_num_set_hex(&n2, c, 64);
+        CHECK(secp256k1_num_eq(&n1, &n2));
+    }
+}
+
+void test_num_get_set_bin(void) {
+    secp256k1_num_t n1,n2;
+    random_num_order_test(&n1);
+    unsigned char c[32];
+    secp256k1_num_get_bin(c, 32, &n1);
+    secp256k1_num_set_bin(&n2, c, 32);
+    CHECK(secp256k1_num_eq(&n1, &n2));
+    for (int i=0; i<32; i++) {
+        /* check whether the lower 8 bits correspond to the last byte */
+        int low1 = secp256k1_num_shift(&n1, 8);
+        int low2 = c[31];
+        CHECK(low1 == low2);
+        /* shift bits off the byte representation, and compare */
+        memmove(c+1, c, 31);
+        c[0] = 0;
+        secp256k1_num_set_bin(&n2, c, 32);
+        CHECK(secp256k1_num_eq(&n1, &n2));
+    }
+}
+
+void run_num_int(void) {
+    secp256k1_num_t n1;
+    for (int i=-255; i<256; i++) {
+        unsigned char c1[3] = {};
+        c1[2] = abs(i);
+        unsigned char c2[3] = {0x11,0x22,0x33};
+        secp256k1_num_set_int(&n1, i);
+        secp256k1_num_get_bin(c2, 3, &n1);
+        CHECK(memcmp(c1, c2, 3) == 0);
+    }
+}
+
+void test_num_negate(void) {
+    secp256k1_num_t n1;
+    secp256k1_num_t n2;
+    random_num_order_test(&n1); /* n1 = R */
+    random_num_negate(&n1);
+    secp256k1_num_copy(&n2, &n1); /* n2 = R */
+    secp256k1_num_sub(&n1, &n2, &n1); /* n1 = n2-n1 = 0 */
+    CHECK(secp256k1_num_is_zero(&n1));
+    secp256k1_num_copy(&n1, &n2); /* n1 = R */
+    secp256k1_num_negate(&n1); /* n1 = -R */
+    CHECK(!secp256k1_num_is_zero(&n1));
+    secp256k1_num_add(&n1, &n2, &n1); /* n1 = n2+n1 = 0 */
+    CHECK(secp256k1_num_is_zero(&n1));
+    secp256k1_num_copy(&n1, &n2); /* n1 = R */
+    secp256k1_num_negate(&n1); /* n1 = -R */
+    CHECK(secp256k1_num_is_neg(&n1) != secp256k1_num_is_neg(&n2));
+    secp256k1_num_negate(&n1); /* n1 = R */
+    CHECK(secp256k1_num_eq(&n1, &n2));
+}
+
+void test_num_add_sub(void) {
+    int r = secp256k1_rand32();
+    secp256k1_num_t n1;
+    secp256k1_num_t n2;
+    random_num_order_test(&n1); /* n1 = R1 */
+    if (r & 1) {
+        random_num_negate(&n1);
+    }
+    random_num_order_test(&n2); /* n2 = R2 */
+    if (r & 2) {
+        random_num_negate(&n2);
+    }
+    secp256k1_num_t n1p2, n2p1, n1m2, n2m1;
+    secp256k1_num_add(&n1p2, &n1, &n2); /* n1p2 = R1 + R2 */
+    secp256k1_num_add(&n2p1, &n2, &n1); /* n2p1 = R2 + R1 */
+    secp256k1_num_sub(&n1m2, &n1, &n2); /* n1m2 = R1 - R2 */
+    secp256k1_num_sub(&n2m1, &n2, &n1); /* n2m1 = R2 - R1 */
+    CHECK(secp256k1_num_eq(&n1p2, &n2p1));
+    CHECK(!secp256k1_num_eq(&n1p2, &n1m2));
+    secp256k1_num_negate(&n2m1); /* n2m1 = -R2 + R1 */
+    CHECK(secp256k1_num_eq(&n2m1, &n1m2));
+    CHECK(!secp256k1_num_eq(&n2m1, &n1));
+    secp256k1_num_add(&n2m1, &n2m1, &n2); /* n2m1 = -R2 + R1 + R2 = R1 */
+    CHECK(secp256k1_num_eq(&n2m1, &n1));
+    CHECK(!secp256k1_num_eq(&n2p1, &n1));
+    secp256k1_num_sub(&n2p1, &n2p1, &n2); /* n2p1 = R2 + R1 - R2 = R1 */
+    CHECK(secp256k1_num_eq(&n2p1, &n1));
+}
+
+void run_num_smalltests(void) {
+    for (int i=0; i<100*count; i++) {
+        test_num_copy_inc_cmp();
+        test_num_get_set_hex();
+        test_num_get_set_bin();
+        test_num_negate();
+        test_num_add_sub();
+    }
+    run_num_int();
+}
+
+/***** SCALAR TESTS *****/
+
+int secp256k1_scalar_eq(const secp256k1_scalar_t *s1, const secp256k1_scalar_t *s2) {
+    secp256k1_scalar_t t;
+    secp256k1_scalar_negate(&t, s2);
+    secp256k1_scalar_add(&t, &t, s1);
+    int ret = secp256k1_scalar_is_zero(&t);
+    return ret;
+}
+
+void scalar_test(void) {
+    unsigned char c[32];
+
+    /* Set 's' to a random scalar, with value 'snum'. */
+    secp256k1_rand256_test(c);
+    secp256k1_scalar_t s;
+    secp256k1_scalar_set_b32(&s, c, NULL);
+    secp256k1_num_t snum;
+    secp256k1_num_set_bin(&snum, c, 32);
+    secp256k1_num_mod(&snum, &secp256k1_ge_consts->order);
+
+    /* Set 's1' to a random scalar, with value 's1num'. */
+    secp256k1_rand256_test(c);
+    secp256k1_scalar_t s1;
+    secp256k1_scalar_set_b32(&s1, c, NULL);
+    secp256k1_num_t s1num;
+    secp256k1_num_set_bin(&s1num, c, 32);
+    secp256k1_num_mod(&s1num, &secp256k1_ge_consts->order);
+
+    /* Set 's2' to a random scalar, with value 'snum2', and byte array representation 'c'. */
+    secp256k1_rand256_test(c);
+    secp256k1_scalar_t s2;
+    int overflow = 0;
+    secp256k1_scalar_set_b32(&s2, c, &overflow);
+    secp256k1_num_t s2num;
+    secp256k1_num_set_bin(&s2num, c, 32);
+    secp256k1_num_mod(&s2num, &secp256k1_ge_consts->order);
+
+    {
+        /* Test that fetching groups of 4 bits from a scalar and recursing n(i)=16*n(i-1)+p(i) reconstructs it. */
+        secp256k1_num_t n, t, m;
+        secp256k1_num_set_int(&n, 0);
+        secp256k1_num_set_int(&m, 16);
+        for (int i = 0; i < 256; i += 4) {
+            secp256k1_num_set_int(&t, secp256k1_scalar_get_bits(&s, 256 - 4 - i, 4));
+            secp256k1_num_mul(&n, &n, &m);
+            secp256k1_num_add(&n, &n, &t);
+        }
+        CHECK(secp256k1_num_eq(&n, &snum));
+    }
+
+    {
+        /* Test that get_b32 returns the same as get_bin on the number. */
+        unsigned char r1[32];
+        secp256k1_scalar_get_b32(r1, &s2);
+        unsigned char r2[32];
+        secp256k1_num_get_bin(r2, 32, &s2num);
+        CHECK(memcmp(r1, r2, 32) == 0);
+        /* If no overflow occurred when assigning, it should also be equal to the original byte array. */
+        CHECK((memcmp(r1, c, 32) == 0) == (overflow == 0));
+    }
+
+    {
+        /* Test that adding the scalars together is equal to adding their numbers together modulo the order. */
+        secp256k1_num_t rnum;
+        secp256k1_num_add(&rnum, &snum, &s2num);
+        secp256k1_num_mod(&rnum, &secp256k1_ge_consts->order);
+        secp256k1_scalar_t r;
+        secp256k1_scalar_add(&r, &s, &s2);
+        secp256k1_num_t r2num;
+        secp256k1_scalar_get_num(&r2num, &r);
+        CHECK(secp256k1_num_eq(&rnum, &r2num));
+    }
+
+    {
+        /* Test that multipying the scalars is equal to multiplying their numbers modulo the order. */
+        secp256k1_num_t rnum;
+        secp256k1_num_mul(&rnum, &snum, &s2num);
+        secp256k1_num_mod(&rnum, &secp256k1_ge_consts->order);
+        secp256k1_scalar_t r;
+        secp256k1_scalar_mul(&r, &s, &s2);
+        secp256k1_num_t r2num;
+        secp256k1_scalar_get_num(&r2num, &r);
+        CHECK(secp256k1_num_eq(&rnum, &r2num));
+        /* The result can only be zero if at least one of the factors was zero. */
+        CHECK(secp256k1_scalar_is_zero(&r) == (secp256k1_scalar_is_zero(&s) || secp256k1_scalar_is_zero(&s2)));
+        /* The results can only be equal to one of the factors if that factor was zero, or the other factor was one. */
+        CHECK(secp256k1_num_eq(&rnum, &snum) == (secp256k1_scalar_is_zero(&s) || secp256k1_scalar_is_one(&s2)));
+        CHECK(secp256k1_num_eq(&rnum, &s2num) == (secp256k1_scalar_is_zero(&s2) || secp256k1_scalar_is_one(&s)));
+    }
+
+    {
+        /* Check that comparison with zero matches comparison with zero on the number. */
+        CHECK(secp256k1_num_is_zero(&snum) == secp256k1_scalar_is_zero(&s));
+        /* Check that comparison with the half order is equal to testing for high scalar. */
+        CHECK(secp256k1_scalar_is_high(&s) == (secp256k1_num_cmp(&snum, &secp256k1_ge_consts->half_order) > 0));
+        secp256k1_scalar_t neg;
+        secp256k1_scalar_negate(&neg, &s);
+        secp256k1_num_t negnum;
+        secp256k1_num_sub(&negnum, &secp256k1_ge_consts->order, &snum);
+        secp256k1_num_mod(&negnum, &secp256k1_ge_consts->order);
+        /* Check that comparison with the half order is equal to testing for high scalar after negation. */
+        CHECK(secp256k1_scalar_is_high(&neg) == (secp256k1_num_cmp(&negnum, &secp256k1_ge_consts->half_order) > 0));
+        /* Negating should change the high property, unless the value was already zero. */
+        CHECK((secp256k1_scalar_is_high(&s) == secp256k1_scalar_is_high(&neg)) == secp256k1_scalar_is_zero(&s));
+        secp256k1_num_t negnum2;
+        secp256k1_scalar_get_num(&negnum2, &neg);
+        /* Negating a scalar should be equal to (order - n) mod order on the number. */
+        CHECK(secp256k1_num_eq(&negnum, &negnum2));
+        secp256k1_scalar_add(&neg, &neg, &s);
+        /* Adding a number to its negation should result in zero. */
+        CHECK(secp256k1_scalar_is_zero(&neg));
+        secp256k1_scalar_negate(&neg, &neg);
+        /* Negating zero should still result in zero. */
+        CHECK(secp256k1_scalar_is_zero(&neg));
+    }
+
+    {
+        /* Test that scalar inverses are equal to the inverse of their number modulo the order. */
+        if (!secp256k1_scalar_is_zero(&s)) {
+            secp256k1_scalar_t inv;
+            secp256k1_scalar_inverse(&inv, &s);
+            secp256k1_num_t invnum;
+            secp256k1_num_mod_inverse(&invnum, &snum, &secp256k1_ge_consts->order);
+            secp256k1_num_t invnum2;
+            secp256k1_scalar_get_num(&invnum2, &inv);
+            CHECK(secp256k1_num_eq(&invnum, &invnum2));
+            secp256k1_scalar_mul(&inv, &inv, &s);
+            /* Multiplying a scalar with its inverse must result in one. */
+            CHECK(secp256k1_scalar_is_one(&inv));
+            secp256k1_scalar_inverse(&inv, &inv);
+            /* Inverting one must result in one. */
+            CHECK(secp256k1_scalar_is_one(&inv));
+        }
+    }
+
+    {
+        /* Test commutativity of add. */
+        secp256k1_scalar_t r1, r2;
+        secp256k1_scalar_add(&r1, &s1, &s2);
+        secp256k1_scalar_add(&r2, &s2, &s1);
+        CHECK(secp256k1_scalar_eq(&r1, &r2));
+    }
+
+    {
+        /* Test commutativity of mul. */
+        secp256k1_scalar_t r1, r2;
+        secp256k1_scalar_mul(&r1, &s1, &s2);
+        secp256k1_scalar_mul(&r2, &s2, &s1);
+        CHECK(secp256k1_scalar_eq(&r1, &r2));
+    }
+
+    {
+        /* Test associativity of add. */
+        secp256k1_scalar_t r1, r2;
+        secp256k1_scalar_add(&r1, &s1, &s2);
+        secp256k1_scalar_add(&r1, &r1, &s);
+        secp256k1_scalar_add(&r2, &s2, &s);
+        secp256k1_scalar_add(&r2, &s1, &r2);
+        CHECK(secp256k1_scalar_eq(&r1, &r2));
+    }
+
+    {
+        /* Test associativity of mul. */
+        secp256k1_scalar_t r1, r2;
+        secp256k1_scalar_mul(&r1, &s1, &s2);
+        secp256k1_scalar_mul(&r1, &r1, &s);
+        secp256k1_scalar_mul(&r2, &s2, &s);
+        secp256k1_scalar_mul(&r2, &s1, &r2);
+        CHECK(secp256k1_scalar_eq(&r1, &r2));
+    }
+
+    {
+        /* Test distributitivity of mul over add. */
+        secp256k1_scalar_t r1, r2, t;
+        secp256k1_scalar_add(&r1, &s1, &s2);
+        secp256k1_scalar_mul(&r1, &r1, &s);
+        secp256k1_scalar_mul(&r2, &s1, &s);
+        secp256k1_scalar_mul(&t, &s2, &s);
+        secp256k1_scalar_add(&r2, &r2, &t);
+        CHECK(secp256k1_scalar_eq(&r1, &r2));
+    }
+
+    {
+        /* Test square. */
+        secp256k1_scalar_t r1, r2;
+        secp256k1_scalar_sqr(&r1, &s1);
+        secp256k1_scalar_mul(&r2, &s1, &s1);
+        CHECK(secp256k1_scalar_eq(&r1, &r2));
+    }
+}
+
+void run_scalar_tests(void) {
+    for (int i = 0; i < 128 * count; i++) {
+        scalar_test();
+    }
+}
+
+/***** FIELD TESTS *****/
+
+void random_fe(secp256k1_fe_t *x) {
+    unsigned char bin[32];
+    secp256k1_rand256(bin);
+    secp256k1_fe_set_b32(x, bin);
+}
+
+void random_fe_non_zero(secp256k1_fe_t *nz) {
+    int tries = 10;
+    while (--tries >= 0) {
+        random_fe(nz);
+        secp256k1_fe_normalize(nz);
+        if (!secp256k1_fe_is_zero(nz))
+            break;
+    }
+    /* Infinitesimal probability of spurious failure here */
+    CHECK(tries >= 0);
+}
+
+void random_fe_non_square(secp256k1_fe_t *ns) {
+    random_fe_non_zero(ns);
+    secp256k1_fe_t r;
+    if (secp256k1_fe_sqrt(&r, ns)) {
+        secp256k1_fe_negate(ns, ns, 1);
+    }
+}
+
+int check_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    secp256k1_fe_t an = *a; secp256k1_fe_normalize(&an);
+    secp256k1_fe_t bn = *b; secp256k1_fe_normalize(&bn);
+    return secp256k1_fe_equal(&an, &bn);
+}
+
+int check_fe_inverse(const secp256k1_fe_t *a, const secp256k1_fe_t *ai) {
+    secp256k1_fe_t x; secp256k1_fe_mul(&x, a, ai);
+    secp256k1_fe_t one; secp256k1_fe_set_int(&one, 1);
+    return check_fe_equal(&x, &one);
+}
+
+void run_field_inv(void) {
+    secp256k1_fe_t x, xi, xii;
+    for (int i=0; i<10*count; i++) {
+        random_fe_non_zero(&x);
+        secp256k1_fe_inv(&xi, &x);
+        CHECK(check_fe_inverse(&x, &xi));
+        secp256k1_fe_inv(&xii, &xi);
+        CHECK(check_fe_equal(&x, &xii));
+    }
+}
+
+void run_field_inv_var(void) {
+    secp256k1_fe_t x, xi, xii;
+    for (int i=0; i<10*count; i++) {
+        random_fe_non_zero(&x);
+        secp256k1_fe_inv_var(&xi, &x);
+        CHECK(check_fe_inverse(&x, &xi));
+        secp256k1_fe_inv_var(&xii, &xi);
+        CHECK(check_fe_equal(&x, &xii));
+    }
+}
+
+void run_field_inv_all(void) {
+    secp256k1_fe_t x[16], xi[16], xii[16];
+    /* Check it's safe to call for 0 elements */
+    secp256k1_fe_inv_all(0, xi, x);
+    for (int i=0; i<count; i++) {
+        size_t len = (secp256k1_rand32() & 15) + 1;
+        for (size_t j=0; j<len; j++)
+            random_fe_non_zero(&x[j]);
+        secp256k1_fe_inv_all(len, xi, x);
+        for (size_t j=0; j<len; j++)
+            CHECK(check_fe_inverse(&x[j], &xi[j]));
+        secp256k1_fe_inv_all(len, xii, xi);
+        for (size_t j=0; j<len; j++)
+            CHECK(check_fe_equal(&x[j], &xii[j]));
+    }
+}
+
+void run_field_inv_all_var(void) {
+    secp256k1_fe_t x[16], xi[16], xii[16];
+    /* Check it's safe to call for 0 elements */
+    secp256k1_fe_inv_all_var(0, xi, x);
+    for (int i=0; i<count; i++) {
+        size_t len = (secp256k1_rand32() & 15) + 1;
+        for (size_t j=0; j<len; j++)
+            random_fe_non_zero(&x[j]);
+        secp256k1_fe_inv_all_var(len, xi, x);
+        for (size_t j=0; j<len; j++)
+            CHECK(check_fe_inverse(&x[j], &xi[j]));
+        secp256k1_fe_inv_all_var(len, xii, xi);
+        for (size_t j=0; j<len; j++)
+            CHECK(check_fe_equal(&x[j], &xii[j]));
+    }
+}
+
+void run_sqr(void) {
+    secp256k1_fe_t x, s;
+
+    {
+        secp256k1_fe_set_int(&x, 1);
+        secp256k1_fe_negate(&x, &x, 1);
+
+        for (int i=1; i<=512; ++i) {
+            secp256k1_fe_mul_int(&x, 2);
+            secp256k1_fe_normalize(&x);
+            secp256k1_fe_sqr(&s, &x);
+        }
+    }
+}
+
+void test_sqrt(const secp256k1_fe_t *a, const secp256k1_fe_t *k) {
+    secp256k1_fe_t r1, r2;
+    int v = secp256k1_fe_sqrt(&r1, a);
+    CHECK((v == 0) == (k == NULL));
+
+    if (k != NULL) {
+        /* Check that the returned root is +/- the given known answer */
+        secp256k1_fe_negate(&r2, &r1, 1);
+        secp256k1_fe_add(&r1, k); secp256k1_fe_add(&r2, k);
+        secp256k1_fe_normalize(&r1); secp256k1_fe_normalize(&r2);
+        CHECK(secp256k1_fe_is_zero(&r1) || secp256k1_fe_is_zero(&r2));
+    }
+}
+
+void run_sqrt(void) {
+    secp256k1_fe_t ns, x, s, t;
+
+    /* Check sqrt(0) is 0 */
+    secp256k1_fe_set_int(&x, 0);
+    secp256k1_fe_sqr(&s, &x);
+    test_sqrt(&s, &x);
+
+    /* Check sqrt of small squares (and their negatives) */
+    for (int i=1; i<=100; i++) {
+        secp256k1_fe_set_int(&x, i);
+        secp256k1_fe_sqr(&s, &x);
+        test_sqrt(&s, &x);
+        secp256k1_fe_negate(&t, &s, 1);
+        test_sqrt(&t, NULL);
+    }
+
+    /* Consistency checks for large random values */
+    for (int i=0; i<10; i++) {
+        random_fe_non_square(&ns);
+        for (int j=0; j<count; j++) {
+            random_fe(&x);
+            secp256k1_fe_sqr(&s, &x);
+            test_sqrt(&s, &x);
+            secp256k1_fe_negate(&t, &s, 1);
+            test_sqrt(&t, NULL);
+            secp256k1_fe_mul(&t, &s, &ns);
+            test_sqrt(&t, NULL);
+        }
+    }
+}
+
+/***** GROUP TESTS *****/
+
+int ge_equals_ge(const secp256k1_ge_t *a, const secp256k1_ge_t *b) {
+    if (a->infinity && b->infinity)
+        return 1;
+    return check_fe_equal(&a->x, &b->x) && check_fe_equal(&a->y, &b->y);
+}
+
+void ge_equals_gej(const secp256k1_ge_t *a, const secp256k1_gej_t *b) {
+    secp256k1_ge_t bb;
+    secp256k1_gej_t bj = *b;
+    secp256k1_ge_set_gej_var(&bb, &bj);
+    CHECK(ge_equals_ge(a, &bb));
+}
+
+void gej_equals_gej(const secp256k1_gej_t *a, const secp256k1_gej_t *b) {
+    secp256k1_ge_t aa, bb;
+    secp256k1_gej_t aj = *a, bj = *b;
+    secp256k1_ge_set_gej_var(&aa, &aj);
+    secp256k1_ge_set_gej_var(&bb, &bj);
+    CHECK(ge_equals_ge(&aa, &bb));
+}
+
+void test_ge(void) {
+    secp256k1_ge_t a, b, i, n;
+    random_group_element_test(&a);
+    random_group_element_test(&b);
+    n = a;
+    secp256k1_fe_normalize(&a.y);
+    secp256k1_fe_negate(&n.y, &a.y, 1);
+    secp256k1_ge_set_infinity(&i);
+    random_field_element_magnitude(&a.x);
+    random_field_element_magnitude(&a.y);
+    random_field_element_magnitude(&b.x);
+    random_field_element_magnitude(&b.y);
+    random_field_element_magnitude(&n.x);
+    random_field_element_magnitude(&n.y);
+
+    secp256k1_gej_t aj, bj, ij, nj;
+    random_group_element_jacobian_test(&aj, &a);
+    random_group_element_jacobian_test(&bj, &b);
+    secp256k1_gej_set_infinity(&ij);
+    random_group_element_jacobian_test(&nj, &n);
+    random_field_element_magnitude(&aj.x);
+    random_field_element_magnitude(&aj.y);
+    random_field_element_magnitude(&aj.z);
+    random_field_element_magnitude(&bj.x);
+    random_field_element_magnitude(&bj.y);
+    random_field_element_magnitude(&bj.z);
+    random_field_element_magnitude(&nj.x);
+    random_field_element_magnitude(&nj.y);
+    random_field_element_magnitude(&nj.z);
+
+    /* gej + gej adds */
+    secp256k1_gej_t aaj; secp256k1_gej_add_var(&aaj, &aj, &aj);
+    secp256k1_gej_t abj; secp256k1_gej_add_var(&abj, &aj, &bj);
+    secp256k1_gej_t aij; secp256k1_gej_add_var(&aij, &aj, &ij);
+    secp256k1_gej_t anj; secp256k1_gej_add_var(&anj, &aj, &nj);
+    secp256k1_gej_t iaj; secp256k1_gej_add_var(&iaj, &ij, &aj);
+    secp256k1_gej_t iij; secp256k1_gej_add_var(&iij, &ij, &ij);
+
+    /* gej + ge adds */
+    secp256k1_gej_t aa; secp256k1_gej_add_ge_var(&aa, &aj, &a);
+    secp256k1_gej_t ab; secp256k1_gej_add_ge_var(&ab, &aj, &b);
+    secp256k1_gej_t ai; secp256k1_gej_add_ge_var(&ai, &aj, &i);
+    secp256k1_gej_t an; secp256k1_gej_add_ge_var(&an, &aj, &n);
+    secp256k1_gej_t ia; secp256k1_gej_add_ge_var(&ia, &ij, &a);
+    secp256k1_gej_t ii; secp256k1_gej_add_ge_var(&ii, &ij, &i);
+
+    /* const gej + ge adds */
+    secp256k1_gej_t aac; secp256k1_gej_add_ge(&aac, &aj, &a);
+    secp256k1_gej_t abc; secp256k1_gej_add_ge(&abc, &aj, &b);
+    secp256k1_gej_t anc; secp256k1_gej_add_ge(&anc, &aj, &n);
+    secp256k1_gej_t iac; secp256k1_gej_add_ge(&iac, &ij, &a);
+
+    CHECK(secp256k1_gej_is_infinity(&an));
+    CHECK(secp256k1_gej_is_infinity(&anj));
+    CHECK(secp256k1_gej_is_infinity(&anc));
+    gej_equals_gej(&aa, &aaj);
+    gej_equals_gej(&aa, &aac);
+    gej_equals_gej(&ab, &abj);
+    gej_equals_gej(&ab, &abc);
+    gej_equals_gej(&an, &anj);
+    gej_equals_gej(&an, &anc);
+    gej_equals_gej(&ia, &iaj);
+    gej_equals_gej(&ai, &aij);
+    gej_equals_gej(&ii, &iij);
+    ge_equals_gej(&a, &ai);
+    ge_equals_gej(&a, &ai);
+    ge_equals_gej(&a, &iaj);
+    ge_equals_gej(&a, &iaj);
+    ge_equals_gej(&a, &iac);
+}
+
+void run_ge(void) {
+    for (int i = 0; i < 2000*count; i++) {
+        test_ge();
+    }
+}
+
+/***** ECMULT TESTS *****/
+
+void run_ecmult_chain(void) {
+    /* random starting point A (on the curve) */
+    secp256k1_fe_t ax; secp256k1_fe_set_hex(&ax, "8b30bbe9ae2a990696b22f670709dff3727fd8bc04d3362c6c7bf458e2846004", 64);
+    secp256k1_fe_t ay; secp256k1_fe_set_hex(&ay, "a357ae915c4a65281309edf20504740f0eb3343990216b4f81063cb65f2f7e0f", 64);
+    secp256k1_gej_t a; secp256k1_gej_set_xy(&a, &ax, &ay);
+    /* two random initial factors xn and gn */
+    secp256k1_num_t xn;
+    secp256k1_num_set_hex(&xn, "84cc5452f7fde1edb4d38a8ce9b1b84ccef31f146e569be9705d357a42985407", 64);
+    secp256k1_num_t gn;
+    secp256k1_num_set_hex(&gn, "a1e58d22553dcd42b23980625d4c57a96e9323d42b3152e5ca2c3990edc7c9de", 64);
+    /* two small multipliers to be applied to xn and gn in every iteration: */
+    secp256k1_num_t xf;
+    secp256k1_num_set_hex(&xf, "1337", 4);
+    secp256k1_num_t gf;
+    secp256k1_num_set_hex(&gf, "7113", 4);
+    /* accumulators with the resulting coefficients to A and G */
+    secp256k1_num_t ae;
+    secp256k1_num_set_int(&ae, 1);
+    secp256k1_num_t ge;
+    secp256k1_num_set_int(&ge, 0);
+    /* the point being computed */
+    secp256k1_gej_t x = a;
+    const secp256k1_num_t *order = &secp256k1_ge_consts->order;
+    for (int i=0; i<200*count; i++) {
+        /* in each iteration, compute X = xn*X + gn*G; */
+        secp256k1_ecmult(&x, &x, &xn, &gn);
+        /* also compute ae and ge: the actual accumulated factors for A and G */
+        /* if X was (ae*A+ge*G), xn*X + gn*G results in (xn*ae*A + (xn*ge+gn)*G) */
+        secp256k1_num_mod_mul(&ae, &ae, &xn, order);
+        secp256k1_num_mod_mul(&ge, &ge, &xn, order);
+        secp256k1_num_add(&ge, &ge, &gn);
+        secp256k1_num_mod(&ge, order);
+        /* modify xn and gn */
+        secp256k1_num_mod_mul(&xn, &xn, &xf, order);
+        secp256k1_num_mod_mul(&gn, &gn, &gf, order);
+
+        /* verify */
+        if (i == 19999) {
+            char res[132]; int resl = 132;
+            secp256k1_gej_get_hex(res, &resl, &x);
+            CHECK(strcmp(res, "(D6E96687F9B10D092A6F35439D86CEBEA4535D0D409F53586440BD74B933E830,B95CBCA2C77DA786539BE8FD53354D2D3B4F566AE658045407ED6015EE1B2A88)") == 0);
+        }
+    }
+    /* redo the computation, but directly with the resulting ae and ge coefficients: */
+    secp256k1_gej_t x2; secp256k1_ecmult(&x2, &a, &ae, &ge);
+    char res[132]; int resl = 132;
+    char res2[132]; int resl2 = 132;
+    secp256k1_gej_get_hex(res, &resl, &x);
+    secp256k1_gej_get_hex(res2, &resl2, &x2);
+    CHECK(strcmp(res, res2) == 0);
+    CHECK(strlen(res) == 131);
+}
+
+void test_point_times_order(const secp256k1_gej_t *point) {
+    /* multiplying a point by the order results in O */
+    const secp256k1_num_t *order = &secp256k1_ge_consts->order;
+    secp256k1_num_t zero;
+    secp256k1_num_set_int(&zero, 0);
+    secp256k1_gej_t res;
+    secp256k1_ecmult(&res, point, order, order); /* calc res = order * point + order * G; */
+    CHECK(secp256k1_gej_is_infinity(&res));
+}
+
+void run_point_times_order(void) {
+    secp256k1_fe_t x; secp256k1_fe_set_hex(&x, "02", 2);
+    for (int i=0; i<500; i++) {
+        secp256k1_ge_t p;
+        if (secp256k1_ge_set_xo(&p, &x, 1)) {
+            CHECK(secp256k1_ge_is_valid(&p));
+            secp256k1_gej_t j;
+            secp256k1_gej_set_ge(&j, &p);
+            CHECK(secp256k1_gej_is_valid(&j));
+            test_point_times_order(&j);
+        }
+        secp256k1_fe_sqr(&x, &x);
+    }
+    char c[65]; int cl=65;
+    secp256k1_fe_get_hex(c, &cl, &x);
+    CHECK(strcmp(c, "7603CB59B0EF6C63FE6084792A0C378CDB3233A80F8A9A09A877DEAD31B38C45") == 0);
+}
+
+void test_wnaf(const secp256k1_num_t *number, int w) {
+    secp256k1_num_t x, two, t;
+    secp256k1_num_set_int(&x, 0);
+    secp256k1_num_set_int(&two, 2);
+    int wnaf[257];
+    int bits = secp256k1_ecmult_wnaf(wnaf, number, w);
+    int zeroes = -1;
+    for (int i=bits-1; i>=0; i--) {
+        secp256k1_num_mul(&x, &x, &two);
+        int v = wnaf[i];
+        if (v) {
+            CHECK(zeroes == -1 || zeroes >= w-1); /* check that distance between non-zero elements is at least w-1 */
+            zeroes=0;
+            CHECK((v & 1) == 1); /* check non-zero elements are odd */
+            CHECK(v <= (1 << (w-1)) - 1); /* check range below */
+            CHECK(v >= -(1 << (w-1)) - 1); /* check range above */
+        } else {
+            CHECK(zeroes != -1); /* check that no unnecessary zero padding exists */
+            zeroes++;
+        }
+        secp256k1_num_set_int(&t, v);
+        secp256k1_num_add(&x, &x, &t);
+    }
+    CHECK(secp256k1_num_eq(&x, number)); /* check that wnaf represents number */
+}
+
+void run_wnaf(void) {
+    secp256k1_num_t n;
+    for (int i=0; i<count; i++) {
+        random_num_order(&n);
+        if (i % 1)
+            secp256k1_num_negate(&n);
+        test_wnaf(&n, 4+(i%10));
+    }
+}
+
+void random_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *key, const secp256k1_scalar_t *msg, int *recid) {
+    secp256k1_scalar_t nonce;
+    do {
+        random_scalar_order_test(&nonce);
+    } while(!secp256k1_ecdsa_sig_sign(sig, key, msg, &nonce, recid));
+}
+
+void test_ecdsa_sign_verify(void) {
+    secp256k1_scalar_t msg, key;
+    random_scalar_order_test(&msg);
+    random_scalar_order_test(&key);
+    secp256k1_gej_t pubj; secp256k1_ecmult_gen(&pubj, &key);
+    secp256k1_ge_t pub; secp256k1_ge_set_gej(&pub, &pubj);
+    secp256k1_ecdsa_sig_t sig;
+    random_sign(&sig, &key, &msg, NULL);
+    secp256k1_num_t msg_num;
+    secp256k1_scalar_get_num(&msg_num, &msg);
+    CHECK(secp256k1_ecdsa_sig_verify(&sig, &pub, &msg_num));
+    secp256k1_num_inc(&msg_num);
+    CHECK(!secp256k1_ecdsa_sig_verify(&sig, &pub, &msg_num));
+}
+
+void run_ecdsa_sign_verify(void) {
+    for (int i=0; i<10*count; i++) {
+        test_ecdsa_sign_verify();
+    }
+}
+
+void test_ecdsa_end_to_end(void) {
+    unsigned char privkey[32];
+    unsigned char message[32];
+
+    /* Generate a random key and message. */
+    {
+        secp256k1_num_t msg, key;
+        random_num_order_test(&msg);
+        random_num_order_test(&key);
+        secp256k1_num_get_bin(privkey, 32, &key);
+        secp256k1_num_get_bin(message, 32, &msg);
+    }
+
+    /* Construct and verify corresponding public key. */
+    CHECK(secp256k1_ec_seckey_verify(privkey) == 1);
+    unsigned char pubkey[65]; int pubkeylen = 65;
+    CHECK(secp256k1_ec_pubkey_create(pubkey, &pubkeylen, privkey, secp256k1_rand32() % 2) == 1);
+    CHECK(secp256k1_ec_pubkey_verify(pubkey, pubkeylen));
+
+    /* Verify private key import and export. */
+    unsigned char seckey[300]; int seckeylen = 300;
+    CHECK(secp256k1_ec_privkey_export(privkey, seckey, &seckeylen, secp256k1_rand32() % 2) == 1);
+    unsigned char privkey2[32];
+    CHECK(secp256k1_ec_privkey_import(privkey2, seckey, seckeylen) == 1);
+    CHECK(memcmp(privkey, privkey2, 32) == 0);
+
+    /* Optionally tweak the keys using addition. */
+    if (secp256k1_rand32() % 3 == 0) {
+        unsigned char rnd[32];
+        secp256k1_rand256_test(rnd);
+        int ret1 = secp256k1_ec_privkey_tweak_add(privkey, rnd);
+        int ret2 = secp256k1_ec_pubkey_tweak_add(pubkey, pubkeylen, rnd);
+        CHECK(ret1 == ret2);
+        if (ret1 == 0) return;
+        unsigned char pubkey2[65]; int pubkeylen2 = 65;
+        CHECK(secp256k1_ec_pubkey_create(pubkey2, &pubkeylen2, privkey, pubkeylen == 33) == 1);
+        CHECK(memcmp(pubkey, pubkey2, pubkeylen) == 0);
+    }
+
+    /* Optionally tweak the keys using multiplication. */
+    if (secp256k1_rand32() % 3 == 0) {
+        unsigned char rnd[32];
+        secp256k1_rand256_test(rnd);
+        int ret1 = secp256k1_ec_privkey_tweak_mul(privkey, rnd);
+        int ret2 = secp256k1_ec_pubkey_tweak_mul(pubkey, pubkeylen, rnd);
+        CHECK(ret1 == ret2);
+        if (ret1 == 0) return;
+        unsigned char pubkey2[65]; int pubkeylen2 = 65;
+        CHECK(secp256k1_ec_pubkey_create(pubkey2, &pubkeylen2, privkey, pubkeylen == 33) == 1);
+        CHECK(memcmp(pubkey, pubkey2, pubkeylen) == 0);
+    }
+
+    /* Sign. */
+    unsigned char signature[72]; int signaturelen = 72;
+    while(1) {
+        unsigned char rnd[32];
+        secp256k1_rand256_test(rnd);
+        if (secp256k1_ecdsa_sign(message, 32, signature, &signaturelen, privkey, rnd) == 1) {
+            break;
+        }
+    }
+    /* Verify. */
+    CHECK(secp256k1_ecdsa_verify(message, 32, signature, signaturelen, pubkey, pubkeylen) == 1);
+    /* Destroy signature and verify again. */
+    signature[signaturelen - 1 - secp256k1_rand32() % 20] += 1 + (secp256k1_rand32() % 255);
+    CHECK(secp256k1_ecdsa_verify(message, 32, signature, signaturelen, pubkey, pubkeylen) != 1);
+
+    /* Compact sign. */
+    unsigned char csignature[64]; int recid = 0;
+    while(1) {
+        unsigned char rnd[32];
+        secp256k1_rand256_test(rnd);
+        if (secp256k1_ecdsa_sign_compact(message, 32, csignature, privkey, rnd, &recid) == 1) {
+            break;
+        }
+    }
+    /* Recover. */
+    unsigned char recpubkey[65]; int recpubkeylen = 0;
+    CHECK(secp256k1_ecdsa_recover_compact(message, 32, csignature, recpubkey, &recpubkeylen, pubkeylen == 33, recid) == 1);
+    CHECK(recpubkeylen == pubkeylen);
+    CHECK(memcmp(pubkey, recpubkey, pubkeylen) == 0);
+    /* Destroy signature and verify again. */
+    csignature[secp256k1_rand32() % 64] += 1 + (secp256k1_rand32() % 255);
+    CHECK(secp256k1_ecdsa_recover_compact(message, 32, csignature, recpubkey, &recpubkeylen, pubkeylen == 33, recid) != 1 ||
+          memcmp(pubkey, recpubkey, pubkeylen) != 0);
+    CHECK(recpubkeylen == pubkeylen);
+
+}
+
+void run_ecdsa_end_to_end(void) {
+    for (int i=0; i<64*count; i++) {
+        test_ecdsa_end_to_end();
+    }
+}
+
+void test_ecdsa_infinity(void) {
+    const unsigned char msg32[32] = {
+        'T', 'h', 'i', 's', ' ', 'i', 's', ' ',
+        'a', ' ', 'v', 'e', 'r', 'y', ' ', 's',
+        'e', 'c', 'r', 'e', 't', ' ', 'm', 'e',
+        's', 's', 'a', 'g', 'e', '.', '.', '.'
+    };
+    const unsigned char sig64[64] = {
+        // Generated by signing the above message with nonce 'This is the nonce we will use...'
+        // and secret key 0 (which is not valid), resulting in recid 0.
+        0x67, 0xCB, 0x28, 0x5F, 0x9C, 0xD1, 0x94, 0xE8,
+        0x40, 0xD6, 0x29, 0x39, 0x7A, 0xF5, 0x56, 0x96,
+        0x62, 0xFD, 0xE4, 0x46, 0x49, 0x99, 0x59, 0x63,
+        0x17, 0x9A, 0x7D, 0xD1, 0x7B, 0xD2, 0x35, 0x32,
+        0x4B, 0x1B, 0x7D, 0xF3, 0x4C, 0xE1, 0xF6, 0x8E,
+        0x69, 0x4F, 0xF6, 0xF1, 0x1A, 0xC7, 0x51, 0xDD,
+        0x7D, 0xD7, 0x3E, 0x38, 0x7E, 0xE4, 0xFC, 0x86,
+        0x6E, 0x1B, 0xE8, 0xEC, 0xC7, 0xDD, 0x95, 0x57
+    };
+    unsigned char pubkey[65];
+    int pubkeylen = 65;
+    CHECK(!secp256k1_ecdsa_recover_compact(msg32, 32, sig64, pubkey, &pubkeylen, 0, 0));
+    CHECK(secp256k1_ecdsa_recover_compact(msg32, 32, sig64, pubkey, &pubkeylen, 0, 1));
+    CHECK(!secp256k1_ecdsa_recover_compact(msg32, 32, sig64, pubkey, &pubkeylen, 0, 2));
+    CHECK(!secp256k1_ecdsa_recover_compact(msg32, 32, sig64, pubkey, &pubkeylen, 0, 3));
+}
+
+void run_ecdsa_infinity(void) {
+    test_ecdsa_infinity();
+}
+
+#ifdef ENABLE_OPENSSL_TESTS
+EC_KEY *get_openssl_key(const secp256k1_scalar_t *key) {
+    unsigned char privkey[300];
+    int privkeylen;
+    int compr = secp256k1_rand32() & 1;
+    const unsigned char* pbegin = privkey;
+    EC_KEY *ec_key = EC_KEY_new_by_curve_name(NID_secp256k1);
+    CHECK(secp256k1_eckey_privkey_serialize(privkey, &privkeylen, key, compr));
+    CHECK(d2i_ECPrivateKey(&ec_key, &pbegin, privkeylen));
+    CHECK(EC_KEY_check_key(ec_key));
+    return ec_key;
+}
+
+void test_ecdsa_openssl(void) {
+    secp256k1_scalar_t key, msg;
+    unsigned char message[32];
+    secp256k1_rand256_test(message);
+    secp256k1_scalar_set_b32(&msg, message, NULL);
+    random_scalar_order_test(&key);
+    secp256k1_gej_t qj;
+    secp256k1_ecmult_gen(&qj, &key);
+    secp256k1_ge_t q;
+    secp256k1_ge_set_gej(&q, &qj);
+    EC_KEY *ec_key = get_openssl_key(&key);
+    CHECK(ec_key);
+    unsigned char signature[80];
+    unsigned int sigsize = 80;
+    CHECK(ECDSA_sign(0, message, sizeof(message), signature, &sigsize, ec_key));
+    secp256k1_ecdsa_sig_t sig;
+    CHECK(secp256k1_ecdsa_sig_parse(&sig, signature, sigsize));
+    secp256k1_num_t msg_num;
+    secp256k1_scalar_get_num(&msg_num, &msg);
+    CHECK(secp256k1_ecdsa_sig_verify(&sig, &q, &msg_num));
+    secp256k1_num_inc(&sig.r);
+    CHECK(!secp256k1_ecdsa_sig_verify(&sig, &q, &msg_num));
+
+    random_sign(&sig, &key, &msg, NULL);
+    int secp_sigsize = 80;
+    CHECK(secp256k1_ecdsa_sig_serialize(signature, &secp_sigsize, &sig));
+    CHECK(ECDSA_verify(0, message, sizeof(message), signature, secp_sigsize, ec_key) == 1);
+
+    EC_KEY_free(ec_key);
+}
+
+void run_ecdsa_openssl(void) {
+    for (int i=0; i<10*count; i++) {
+        test_ecdsa_openssl();
+    }
+}
+#endif
+
+int main(int argc, char **argv) {
+    /* find iteration count */
+    if (argc > 1) {
+        count = strtol(argv[1], NULL, 0);
+    }
+
+    /* find random seed */
+    uint64_t seed;
+    if (argc > 2) {
+        seed = strtoull(argv[2], NULL, 0);
+    } else {
+        FILE *frand = fopen("/dev/urandom", "r");
+        if (!frand || !fread(&seed, sizeof(seed), 1, frand)) {
+            seed = time(NULL) * 1337;
+        }
+        fclose(frand);
+    }
+    secp256k1_rand_seed(seed);
+
+    printf("test count = %i\n", count);
+    printf("random seed = %llu\n", (unsigned long long)seed);
+
+    /* initialize */
+    secp256k1_start(SECP256K1_START_SIGN | SECP256K1_START_VERIFY);
+
+    /* num tests */
+    run_num_smalltests();
+
+    /* scalar tests */
+    run_scalar_tests();
+
+    /* field tests */
+    run_field_inv();
+    run_field_inv_var();
+    run_field_inv_all();
+    run_field_inv_all_var();
+    run_sqr();
+    run_sqrt();
+
+    /* group tests */
+    run_ge();
+
+    /* ecmult tests */
+    run_wnaf();
+    run_point_times_order();
+    run_ecmult_chain();
+
+    /* ecdsa tests */
+    run_ecdsa_sign_verify();
+    run_ecdsa_end_to_end();
+    run_ecdsa_infinity();
+#ifdef ENABLE_OPENSSL_TESTS
+    run_ecdsa_openssl();
+#endif
+
+    printf("random run = %llu\n", (unsigned long long)secp256k1_rand32() + ((unsigned long long)secp256k1_rand32() << 32));
+
+    /* shutdown */
+    secp256k1_stop();
+    return 0;
+}
diff --git a/src/util.h b/src/util.h
new file mode 100644
index 0000000000..96b47057c0
--- /dev/null
+++ b/src/util.h
@@ -0,0 +1,64 @@
+/**********************************************************************
+ * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_UTIL_H_
+#define _SECP256K1_UTIL_H_
+
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef DETERMINISTIC
+#define TEST_FAILURE(msg) do { \
+    fprintf(stderr, "%s\n", msg); \
+    abort(); \
+} while(0);
+#else
+#define TEST_FAILURE(msg) do { \
+    fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, msg); \
+    abort(); \
+} while(0)
+#endif
+
+#ifndef HAVE_BUILTIN_EXPECT
+#define EXPECT(x,c) __builtin_expect((x),(c))
+#else
+#define EXPECT(x,c) (x)
+#endif
+
+#ifdef DETERMINISTIC
+#define CHECK(cond) do { \
+    if (EXPECT(!(cond), 0)) { \
+        TEST_FAILURE("test condition failed"); \
+    } \
+} while(0)
+#else
+#define CHECK(cond) do { \
+    if (EXPECT(!(cond), 0)) { \
+        TEST_FAILURE("test condition failed: " #cond); \
+    } \
+} while(0)
+#endif
+
+/* Like assert(), but safe to use on expressions with side effects. */
+#ifndef NDEBUG
+#define DEBUG_CHECK CHECK
+#else
+#define DEBUG_CHECK(cond) do { (void)(cond); } while(0)
+#endif
+
+/* Like DEBUG_CHECK(), but when VERIFY is defined instead of NDEBUG not defined. */
+#ifdef VERIFY
+#define VERIFY_CHECK CHECK
+#else
+#define VERIFY_CHECK(cond) do { (void)(cond); } while(0)
+#endif
+
+#endif