Litecoin: Runtime detection of SSE2 32bit x86 for Scrypt

* x86_64 and Intel MacOS X always uses scrypt-sse2, non-x86 uses scrypt-generic.
* x86 (not Mac) detects cpuid features and chooses sse2 or generic during runtime.

How to Build with SSE2 Support
==============================
* make litecoind with USE_SSE2=1
* qmake with USE_SSE2=1
This commit is contained in:
Warren Togami 2013-09-14 05:19:30 -10:00
parent 4b82dceb7b
commit cd8eeccc2d
12 changed files with 128 additions and 21 deletions

View File

@ -316,6 +316,11 @@ DEFINES += BITCOIN_QT_TEST
macx: CONFIG -= app_bundle
}
contains(USE_SSE2, 1) {
SOURCES += src/scrypt-sse2.cpp
DEFINES += USE_SSE2
}
# Todo: Remove this line when switching to Qt5, as that option was removed
CODECFORTR = UTF-8

View File

@ -51,7 +51,7 @@ script: |
export LD_PRELOAD=/usr/lib/faketime/libfaketime.so.1
export FAKETIME=$REFERENCE_DATETIME
export TZ=UTC
$HOME/qt/src/bin/qmake -spec unsupported/win32-g++-cross MINIUPNPC_LIB_PATH=$HOME/build/miniupnpc MINIUPNPC_INCLUDE_PATH=$HOME/build/ BDB_LIB_PATH=$HOME/build/db-4.8.30.NC/build_unix BDB_INCLUDE_PATH=$HOME/build/db-4.8.30.NC/build_unix BOOST_LIB_PATH=$HOME/build/boost_1_50_0/stage/lib BOOST_INCLUDE_PATH=$HOME/build/boost_1_50_0 BOOST_LIB_SUFFIX=-mt-s BOOST_THREAD_LIB_SUFFIX=_win32-mt-s OPENSSL_LIB_PATH=$HOME/build/openssl-1.0.1c OPENSSL_INCLUDE_PATH=$HOME/build/openssl-1.0.1c/include QRENCODE_LIB_PATH=$HOME/build/qrencode-3.2.0/.libs QRENCODE_INCLUDE_PATH=$HOME/build/qrencode-3.2.0 USE_QRCODE=1 INCLUDEPATH=$HOME/build DEFINES=BOOST_THREAD_USE_LIB BITCOIN_NEED_QT_PLUGINS=1 QMAKE_LRELEASE=lrelease QMAKE_CXXFLAGS=-frandom-seed=litecoin USE_BUILD_INFO=1
$HOME/qt/src/bin/qmake -spec unsupported/win32-g++-cross MINIUPNPC_LIB_PATH=$HOME/build/miniupnpc MINIUPNPC_INCLUDE_PATH=$HOME/build/ BDB_LIB_PATH=$HOME/build/db-4.8.30.NC/build_unix BDB_INCLUDE_PATH=$HOME/build/db-4.8.30.NC/build_unix BOOST_LIB_PATH=$HOME/build/boost_1_50_0/stage/lib BOOST_INCLUDE_PATH=$HOME/build/boost_1_50_0 BOOST_LIB_SUFFIX=-mt-s BOOST_THREAD_LIB_SUFFIX=_win32-mt-s OPENSSL_LIB_PATH=$HOME/build/openssl-1.0.1c OPENSSL_INCLUDE_PATH=$HOME/build/openssl-1.0.1c/include QRENCODE_LIB_PATH=$HOME/build/qrencode-3.2.0/.libs QRENCODE_INCLUDE_PATH=$HOME/build/qrencode-3.2.0 USE_QRCODE=1 INCLUDEPATH=$HOME/build DEFINES=BOOST_THREAD_USE_LIB BITCOIN_NEED_QT_PLUGINS=1 QMAKE_LRELEASE=lrelease QMAKE_CXXFLAGS=-frandom-seed=litecoin USE_BUILD_INFO=1 USE_SSE2=1
make $MAKEOPTS
i586-mingw32msvc-strip release/litecoin-qt.exe
cp release/litecoin-qt.exe $OUTDIR/
@ -60,7 +60,7 @@ script: |
export LD_PRELOAD=/usr/lib/faketime/libfaketime.so.1
export FAKETIME=$REFERENCE_DATETIME
export TZ=UTC
make -f makefile.linux-mingw $MAKEOPTS DEPSDIR=$HOME/build litecoind.exe USE_UPNP=0 DEBUGFLAGS="-frandom-seed=litecoin"
make -f makefile.linux-mingw $MAKEOPTS DEPSDIR=$HOME/build litecoind.exe USE_UPNP=0 DEBUGFLAGS="-frandom-seed=litecoin" USE_SSE2=1
i586-mingw32msvc-strip litecoind.exe
mkdir $OUTDIR/daemon
cp litecoind.exe $OUTDIR/daemon

View File

@ -46,10 +46,10 @@ script: |
cp $OUTDIR/src/doc/README.md $OUTDIR
cp $OUTDIR/src/COPYING $OUTDIR
cd src
make -f makefile.unix STATIC=1 OPENSSL_INCLUDE_PATH="$INSTDIR/include" OPENSSL_LIB_PATH="$INSTDIR/lib" $MAKEOPTS litecoind USE_UPNP=0 DEBUGFLAGS=
make -f makefile.unix STATIC=1 OPENSSL_INCLUDE_PATH="$INSTDIR/include" OPENSSL_LIB_PATH="$INSTDIR/lib" $MAKEOPTS litecoind USE_UPNP=0 DEBUGFLAGS= USE_SSE2=1
mkdir -p $OUTDIR/bin/$GBUILD_BITS
install -s litecoind $OUTDIR/bin/$GBUILD_BITS
cd ..
qmake INCLUDEPATH="$INSTDIR/include" LIBS="-L$INSTDIR/lib" RELEASE=1 USE_QRCODE=1
qmake INCLUDEPATH="$INSTDIR/include" LIBS="-L$INSTDIR/lib" RELEASE=1 USE_QRCODE=1 USE_SSE2=1
make $MAKEOPTS
install litecoin-qt $OUTDIR/bin/$GBUILD_BITS

View File

@ -22,6 +22,18 @@
#include <signal.h>
#endif
#if defined(USE_SSE2)
#if !defined(MAC_OSX) && (defined(_M_IX86) || defined(__i386__) || defined(__i386))
#ifdef _MSC_VER
// MSVC 64bit is unable to use inline asm
#include <intrin.h>
#else
// GCC Linux or i686-w64-mingw32
#include <cpuid.h>
#endif
#endif
#endif
using namespace std;
using namespace boost;
@ -494,6 +506,23 @@ bool AppInit2(boost::thread_group& threadGroup)
sigaction(SIGHUP, &sa_hup, NULL);
#endif
#if defined(USE_SSE2)
unsigned int cpuid_edx=0;
#if !defined(MAC_OSX) && (defined(_M_IX86) || defined(__i386__) || defined(__i386))
// 32bit x86 Linux or Windows, detect cpuid features
#if defined(_MSC_VER)
// MSVC
int x86cpuid[4];
__cpuid(x86cpuid, 1);
cpuid_edx = (unsigned int)buffer[3];
#else
// Linux or i686-w64-mingw32 (gcc-4.6.3)
unsigned int eax, ebx, ecx;
__get_cpuid(1, &eax, &ebx, &ecx, &cpuid_edx);
#endif
#endif
#endif
// ********************************************************* Step 2: parameter interactions
fTestNet = GetBoolArg("-testnet");
@ -808,6 +837,10 @@ bool AppInit2(boost::thread_group& threadGroup)
// ********************************************************* Step 7: load block chain
#if defined(USE_SSE2)
scrypt_detect_sse2(cpuid_edx);
#endif
fReindex = GetBoolArg("-reindex");
// Upgrading to 0.8; hard-link the old blknnnn.dat files into /blocks/

View File

@ -2234,6 +2234,9 @@ struct CBlockTemplate
std::vector<int64_t> vTxSigOps;
};
#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64)
extern unsigned int cpuid_edx;
#endif

View File

@ -94,6 +94,12 @@ OBJS= \
obj/leveldb.o \
obj/txdb.o
ifdef USE_SSE2
DEFS += -DUSE_SSE2
OBJS_SSE2= obj/scrypt-sse2.o
OBJS += $(OBJS_SSE2)
endif
all: litecoind.exe
DEFS += -I"$(CURDIR)/leveldb/include"
@ -106,6 +112,9 @@ obj/build.h: FORCE
version.cpp: obj/build.h
DEFS += -DHAVE_BUILD_INFO
obj/%-sse2.o: %-sse2.cpp
$(CXX) -c $(xCXXFLAGS) -msse2 -o $@ $<
obj/%.o: %.cpp $(HEADERS)
$(CXX) -c $(xCXXFLAGS) -o $@ $<

View File

@ -102,6 +102,11 @@ OBJS= \
obj/leveldb.o \
obj/txdb.o
ifdef USE_SSE2
DEFS += -DUSE_SSE2
OBJS_SSE2= obj/scrypt-sse2.o
OBJS += $(OBJS_SSE2)
endif
all: litecoind.exe
@ -117,6 +122,9 @@ DEFS += $(addprefix -I,$(CURDIR)/leveldb/helpers)
leveldb/libleveldb.a:
cd leveldb && $(MAKE) CC=$(CC) CXX=$(CXX) OPT="$(CFLAGS)" TARGET_OS=NATIVE_WINDOWS libleveldb.a libmemenv.a && cd ..
obj/%-sse2.o: %-sse2.cpp
$(CXX) -c $(CFLAGS) -msse2 -o $@ $<
obj/%.o: %.cpp $(HEADERS)
$(CXX) -c $(CFLAGS) -o $@ $<

View File

@ -105,6 +105,12 @@ OBJS= \
obj/leveldb.o \
obj/txdb.o
ifdef USE_SSE2
DEFS += -DUSE_SSE2
OBJS_SSE2= obj/scrypt-sse2.o
OBJS += $(OBJS_SSE2)
endif
ifndef USE_UPNP
override USE_UPNP = -
endif
@ -144,6 +150,13 @@ obj/build.h: FORCE
version.cpp: obj/build.h
DEFS += -DHAVE_BUILD_INFO
obj/%-sse2.o: %-sse2.cpp
$(CXX) -c $(CFLAGS) -msse2 -MMD -MF $(@:%.o=%.d) -o $@ $<
@cp $(@:%.o=%.d) $(@:%.o=%.P); \
sed -e 's/#.*//' -e 's/^[^:]*: *//' -e 's/ *\\$$//' \
-e '/^$$/ d' -e 's/$$/ :/' < $(@:%.o=%.d) >> $(@:%.o=%.P); \
rm -f $(@:%.o=%.d)
obj/%.o: %.cpp
$(CXX) -c $(CFLAGS) -MMD -MF $(@:%.o=%.d) -o $@ $<
@cp $(@:%.o=%.d) $(@:%.o=%.P); \

View File

@ -144,10 +144,10 @@ OBJS= \
obj/leveldb.o \
obj/txdb.o
OBJS_SSE2= obj/scrypt-sse2.o
ifdef SSE2
ifdef USE_SSE2
DEFS += -DUSE_SSE2
OBJS_SSE2= obj/scrypt-sse2.o
OBJS += $(OBJS_SSE2)
endif

View File

@ -26,7 +26,7 @@
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#ifdef __SSE2__
#include "scrypt.h"
#include <stdlib.h>
#include <stdint.h>
@ -91,10 +91,9 @@ static inline void xor_salsa8_sse2(__m128i B[4], const __m128i Bx[4])
B[2] = _mm_add_epi32(B[2], X2);
B[3] = _mm_add_epi32(B[3], X3);
}
#endif
void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad)
{
#ifdef __SSE2__
uint8_t B[128];
union {
__m128i i128[8];
@ -134,5 +133,4 @@ void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchp
}
PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)output, 32);
#endif
}

View File

@ -28,6 +28,7 @@
*/
#include "scrypt.h"
#include "util.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@ -242,7 +243,7 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
B[15] += x15;
}
void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad)
void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scratchpad)
{
uint8_t B[128];
uint32_t X[32];
@ -275,14 +276,48 @@ void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad)
PBKDF2_SHA256((const uint8_t *)input, 80, B, 128, 1, (uint8_t *)output, 32);
}
#if defined(USE_SSE2)
#if defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) || (defined(MAC_OSX) && defined(__i386__))
/* Always SSE2 */
void scrypt_detect_sse2(unsigned int cpuid_edx)
{
printf("scrypt: using scrypt-sse2 as built.\n");
}
#else
/* Detect SSE2 */
void (*scrypt_1024_1_1_256_sp)(const char *input, char *output, char *scratchpad);
void scrypt_detect_sse2(unsigned int cpuid_edx)
{
if (cpuid_edx & 1<<26)
{
scrypt_1024_1_1_256_sp = &scrypt_1024_1_1_256_sp_sse2;
printf("scrypt: using scrypt-sse2 as detected.\n");
}
else
{
scrypt_1024_1_1_256_sp = &scrypt_1024_1_1_256_sp_generic;
printf("scrypt: using scrypt-generic, SSE2 unavailable.\n");
}
}
#endif
#endif
void scrypt_1024_1_1_256(const char *input, char *output)
{
char scratchpad[SCRYPT_SCRATCHPAD_SIZE];
#ifdef USE_SSE2
// todo: runtime detection at startup and use function pointer
if(1)
scrypt_1024_1_1_256_sp_sse2(input, output, scratchpad);
else
#if defined(USE_SSE2)
// Detection would work, but in cases where we KNOW it always has SSE2,
// it is faster to use directly than to use a function pointer or conditional.
#if defined(_M_X64) || defined(__x86_64__) || defined(_M_AMD64) || (defined(MAC_OSX) && defined(__i386__))
// Always SSE2: x86_64 or Intel MacOS X
scrypt_1024_1_1_256_sp_sse2(input, output, scratchpad);
#else
// Detect SSE2: 32bit x86 Linux or Windows
scrypt_1024_1_1_256_sp(input, output, scratchpad);
#endif
#else
// Generic scrypt
scrypt_1024_1_1_256_sp_generic(input, output, scratchpad);
#endif
scrypt_1024_1_1_256_sp(input, output, scratchpad);
}

View File

@ -4,9 +4,14 @@
#include <stdint.h>
static const int SCRYPT_SCRATCHPAD_SIZE = 131072 + 63;
void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad);
void scrypt_1024_1_1_256_sp(const char *input, char *output, char *scratchpad);
void scrypt_1024_1_1_256(const char *input, char *output);
void scrypt_1024_1_1_256_sp_generic(const char *input, char *output, char *scratchpad);
#if defined(USE_SSE2)
extern void scrypt_detect_sse2(unsigned int cpuid_edx);
void scrypt_1024_1_1_256_sp_sse2(const char *input, char *output, char *scratchpad);
extern void (*scrypt_1024_1_1_256_sp)(const char *input, char *output, char *scratchpad);
#endif
void
PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen, const uint8_t *salt,
@ -27,6 +32,4 @@ static inline void le32enc(void *pp, uint32_t x)
p[2] = (x >> 16) & 0xff;
p[3] = (x >> 24) & 0xff;
}
#endif