From 982709199f1b4e9e35211c419a81938f9f1dd4ed Mon Sep 17 00:00:00 2001 From: MarcoFalke Date: Wed, 2 Dec 2015 12:26:24 +0100 Subject: [PATCH] Squashed 'src/univalue/' changes from 5839ac3..2740c4f 2740c4f Merge branch '2015_11_escape_plan' into bitcoin 7482163 Add new testcase to Makefile.am 46098ee Version 1.0.1. ccf3575 parser: Ensure multiple values cannot follow each other eb6cd64 Omit Obj/Arr open token from jsonTokenIsValue() test bfef9e2 Makefile.am: list recently added test data, fail{35,36}.json 3e319f3 parser: Tighten array, object syntax checks. c74185c parser: transform C++ variables into bitmask f2568bc Prefer C++ STL vector .at() for accessing object values. 8eafa26 travis: run parallel 'make distcheck' fd448da test: Improve tester diagnostics. Add failing test case from #15 2158205 Use internal, locale-independent isspace(), isdigit() implementations. 2ab9ad4 travis: Make 'make distcheck' for more comprehensive checks. 3339191 Escape all control characters git-subtree-dir: src/univalue git-subtree-split: 2740c4f71242086a7eb3dc32f812546ba9fad913 --- .gitignore | 1 + .travis.yml | 2 +- Makefile.am | 6 ++- configure.ac | 2 +- gen/gen.cpp | 15 ++++-- include/univalue.h | 35 +++++++++++++- lib/univalue.cpp | 21 +++----- lib/univalue_escapes.h | 56 ++++++++++----------- lib/univalue_read.cpp | 107 ++++++++++++++++++++++++++++++++--------- lib/univalue_write.cpp | 7 ++- test/.gitignore | 3 ++ test/fail35.json | 1 + test/fail36.json | 1 + test/fail37.json | 1 + test/round1.json | 1 + test/unitester.cpp | 29 ++++++++--- 16 files changed, 205 insertions(+), 83 deletions(-) create mode 100644 test/fail35.json create mode 100644 test/fail36.json create mode 100644 test/fail37.json create mode 100644 test/round1.json diff --git a/.gitignore b/.gitignore index a7a2ca919..19e42f814 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ univalue-config.h* test-driver libtool ltmain.sh +test-suite.log *.a *.la diff --git a/.travis.yml b/.travis.yml index af632c78d..d318d9cc8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,7 +36,7 @@ script: - ./configure --cache-file=config.cache $UNIVALUE_CONFIG_ALL $UNIVALUE_CONFIG || ( cat config.log && false) - make -s $MAKEJOBS $GOAL || ( echo "Build failure. Verbose build follows." && make $GOAL ; false ) - export LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/depends/$HOST/lib - - if [ "$RUN_TESTS" = "true" ]; then make check; fi + - if [ "$RUN_TESTS" = "true" ]; then make $MAKEJOBS distcheck; fi matrix: fast_finish: true diff --git a/Makefile.am b/Makefile.am index df9e66229..34fe9e3f1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -70,6 +70,9 @@ TEST_FILES = \ $(TEST_DATA_DIR)/fail32.json \ $(TEST_DATA_DIR)/fail33.json \ $(TEST_DATA_DIR)/fail34.json \ + $(TEST_DATA_DIR)/fail35.json \ + $(TEST_DATA_DIR)/fail36.json \ + $(TEST_DATA_DIR)/fail37.json \ $(TEST_DATA_DIR)/fail3.json \ $(TEST_DATA_DIR)/fail4.json \ $(TEST_DATA_DIR)/fail5.json \ @@ -79,6 +82,7 @@ TEST_FILES = \ $(TEST_DATA_DIR)/fail9.json \ $(TEST_DATA_DIR)/pass1.json \ $(TEST_DATA_DIR)/pass2.json \ - $(TEST_DATA_DIR)/pass3.json + $(TEST_DATA_DIR)/pass3.json \ + $(TEST_DATA_DIR)/round1.json EXTRA_DIST=$(TEST_FILES) $(GEN_SRCS) diff --git a/configure.ac b/configure.ac index 6cd951622..0515b632b 100644 --- a/configure.ac +++ b/configure.ac @@ -14,7 +14,7 @@ m4_define([libunivalue_age], [m4_eval(libunivalue_binary_age - libunivalue_inter m4_define([libunivalue_version], [libunivalue_major_version().libunivalue_minor_version().libunivalue_micro_version()libunivalue_extraversion()]) -AC_INIT([univalue], [1.0.0], +AC_INIT([univalue], [1.0.1], [http://github.com/jgarzik/univalue/]) dnl make the compilation flags quiet unless V=1 is used diff --git a/gen/gen.cpp b/gen/gen.cpp index 5e5a4d4ae..17f361941 100644 --- a/gen/gen.cpp +++ b/gen/gen.cpp @@ -8,7 +8,6 @@ // $ ./gen > univalue_escapes.h // -#include #include #include #include "univalue.h" @@ -16,10 +15,17 @@ using namespace std; static bool initEscapes; -static const char *escapes[256]; +static std::string escapes[256]; static void initJsonEscape() { + // Escape all lower control characters (some get overridden with smaller sequences below) + for (int ch=0x00; ch<0x20; ++ch) { + char tmpbuf[20]; + snprintf(tmpbuf, sizeof(tmpbuf), "\\u%04x", ch); + escapes[ch] = std::string(tmpbuf); + } + escapes[(int)'"'] = "\\\""; escapes[(int)'\\'] = "\\\\"; escapes[(int)'\b'] = "\\b"; @@ -27,6 +33,7 @@ static void initJsonEscape() escapes[(int)'\n'] = "\\n"; escapes[(int)'\r'] = "\\r"; escapes[(int)'\t'] = "\\t"; + escapes[(int)'\x7f'] = "\\u007f"; // U+007F DELETE initEscapes = true; } @@ -39,13 +46,13 @@ static void outputEscape() "static const char *escapes[256] = {\n"); for (unsigned int i = 0; i < 256; i++) { - if (!escapes[i]) { + if (escapes[i].empty()) { printf("\tNULL,\n"); } else { printf("\t\""); unsigned int si; - for (si = 0; si < strlen(escapes[i]); si++) { + for (si = 0; si < escapes[i].size(); si++) { char ch = escapes[i][si]; switch (ch) { case '"': diff --git a/include/univalue.h b/include/univalue.h index ac0511601..8428b1c68 100644 --- a/include/univalue.h +++ b/include/univalue.h @@ -243,8 +243,41 @@ extern enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed, const char *raw); extern const char *uvTypeName(UniValue::VType t); +static inline bool jsonTokenIsValue(enum jtokentype jtt) +{ + switch (jtt) { + case JTOK_KW_NULL: + case JTOK_KW_TRUE: + case JTOK_KW_FALSE: + case JTOK_NUMBER: + case JTOK_STRING: + return true; + + default: + return false; + } + + // not reached +} + +static inline bool json_isspace(int ch) +{ + switch (ch) { + case 0x20: + case 0x09: + case 0x0a: + case 0x0d: + return true; + + default: + return false; + } + + // not reached +} + extern const UniValue NullUniValue; const UniValue& find_value( const UniValue& obj, const std::string& name); -#endif // __UNIVALUE_H__ \ No newline at end of file +#endif // __UNIVALUE_H__ diff --git a/lib/univalue.cpp b/lib/univalue.cpp index 883e8651f..0076d6678 100644 --- a/lib/univalue.cpp +++ b/lib/univalue.cpp @@ -4,7 +4,6 @@ // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include -#include #include #include #include @@ -21,7 +20,7 @@ static bool ParsePrechecks(const std::string& str) { if (str.empty()) // No empty string allowed return false; - if (str.size() >= 1 && (isspace(str[0]) || isspace(str[str.size()-1]))) // No padding allowed + if (str.size() >= 1 && (json_isspace(str[0]) || json_isspace(str[str.size()-1]))) // No padding allowed return false; if (str.size() != strlen(str.c_str())) // No embedded NUL characters allowed return false; @@ -210,7 +209,7 @@ bool UniValue::pushKVs(const UniValue& obj) for (unsigned int i = 0; i < obj.keys.size(); i++) { keys.push_back(obj.keys[i]); - values.push_back(obj.values[i]); + values.push_back(obj.values.at(i)); } return true; @@ -234,7 +233,7 @@ bool UniValue::checkObject(const std::map& t) if (idx < 0) return false; - if (values[idx].getType() != it->second) + if (values.at(idx).getType() != it->second) return false; } @@ -250,7 +249,7 @@ const UniValue& UniValue::operator[](const std::string& key) const if (index < 0) return NullUniValue; - return values[index]; + return values.at(index); } const UniValue& UniValue::operator[](unsigned int index) const @@ -260,7 +259,7 @@ const UniValue& UniValue::operator[](unsigned int index) const if (index >= values.size()) return NullUniValue; - return values[index]; + return values.at(index); } const char *uvTypeName(UniValue::VType t) @@ -278,15 +277,11 @@ const char *uvTypeName(UniValue::VType t) return NULL; } -const UniValue& find_value( const UniValue& obj, const std::string& name) +const UniValue& find_value(const UniValue& obj, const std::string& name) { for (unsigned int i = 0; i < obj.keys.size(); i++) - { - if( obj.keys[i] == name ) - { - return obj.values[i]; - } - } + if (obj.keys[i] == name) + return obj.values.at(i); return NullUniValue; } diff --git a/lib/univalue_escapes.h b/lib/univalue_escapes.h index 4133b24ca..74596aab6 100644 --- a/lib/univalue_escapes.h +++ b/lib/univalue_escapes.h @@ -2,38 +2,38 @@ #ifndef BITCOIN_UNIVALUE_UNIVALUE_ESCAPES_H #define BITCOIN_UNIVALUE_UNIVALUE_ESCAPES_H static const char *escapes[256] = { - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, + "\\u0000", + "\\u0001", + "\\u0002", + "\\u0003", + "\\u0004", + "\\u0005", + "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", - NULL, + "\\u000b", "\\f", "\\r", - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, + "\\u000e", + "\\u000f", + "\\u0010", + "\\u0011", + "\\u0012", + "\\u0013", + "\\u0014", + "\\u0015", + "\\u0016", + "\\u0017", + "\\u0018", + "\\u0019", + "\\u001a", + "\\u001b", + "\\u001c", + "\\u001d", + "\\u001e", + "\\u001f", NULL, NULL, "\\\"", @@ -129,7 +129,7 @@ static const char *escapes[256] = { NULL, NULL, NULL, - NULL, + "\\u007f", NULL, NULL, NULL, diff --git a/lib/univalue_read.cpp b/lib/univalue_read.cpp index 64591234c..c7516b962 100644 --- a/lib/univalue_read.cpp +++ b/lib/univalue_read.cpp @@ -9,6 +9,11 @@ using namespace std; +static bool json_isdigit(int ch) +{ + return ((ch >= '0') && (ch <= '9')); +} + // convert hexadecimal string to unsigned integer static const char *hatoui(const char *first, const char *last, unsigned int& out) @@ -17,7 +22,7 @@ static const char *hatoui(const char *first, const char *last, for (; first != last; ++first) { int digit; - if (isdigit(*first)) + if (json_isdigit(*first)) digit = *first - '0'; else if (*first >= 'a' && *first <= 'f') @@ -44,7 +49,7 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, const char *rawStart = raw; - while ((*raw) && (isspace(*raw))) // skip whitespace + while ((*raw) && (json_isspace(*raw))) // skip whitespace raw++; switch (*raw) { @@ -113,18 +118,18 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, const char *first = raw; const char *firstDigit = first; - if (!isdigit(*firstDigit)) + if (!json_isdigit(*firstDigit)) firstDigit++; - if ((*firstDigit == '0') && isdigit(firstDigit[1])) + if ((*firstDigit == '0') && json_isdigit(firstDigit[1])) return JTOK_ERR; numStr += *raw; // copy first char raw++; - if ((*first == '-') && (!isdigit(*raw))) + if ((*first == '-') && (!json_isdigit(*raw))) return JTOK_ERR; - while ((*raw) && isdigit(*raw)) { // copy digits + while ((*raw) && json_isdigit(*raw)) { // copy digits numStr += *raw; raw++; } @@ -134,9 +139,9 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, numStr += *raw; // copy . raw++; - if (!isdigit(*raw)) + if (!json_isdigit(*raw)) return JTOK_ERR; - while ((*raw) && isdigit(*raw)) { // copy digits + while ((*raw) && json_isdigit(*raw)) { // copy digits numStr += *raw; raw++; } @@ -152,9 +157,9 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, raw++; } - if (!isdigit(*raw)) + if (!json_isdigit(*raw)) return JTOK_ERR; - while ((*raw) && isdigit(*raw)) { // copy digits + while ((*raw) && json_isdigit(*raw)) { // copy digits numStr += *raw; raw++; } @@ -236,12 +241,23 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, } } +enum expect_bits { + EXP_OBJ_NAME = (1U << 0), + EXP_COLON = (1U << 1), + EXP_ARR_VALUE = (1U << 2), + EXP_VALUE = (1U << 3), + EXP_NOT_VALUE = (1U << 4), +}; + +#define expect(bit) (expectMask & (EXP_##bit)) +#define setExpect(bit) (expectMask |= EXP_##bit) +#define clearExpect(bit) (expectMask &= ~EXP_##bit) + bool UniValue::read(const char *raw) { clear(); - bool expectName = false; - bool expectColon = false; + uint32_t expectMask = 0; vector stack; string tokenVal; @@ -256,6 +272,41 @@ bool UniValue::read(const char *raw) return false; raw += consumed; + bool isValueOpen = jsonTokenIsValue(tok) || + tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN; + + if (expect(VALUE)) { + if (!isValueOpen) + return false; + clearExpect(VALUE); + + } else if (expect(ARR_VALUE)) { + bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE); + if (!isArrValue) + return false; + + clearExpect(ARR_VALUE); + + } else if (expect(OBJ_NAME)) { + bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING); + if (!isObjName) + return false; + + } else if (expect(COLON)) { + if (tok != JTOK_COLON) + return false; + clearExpect(COLON); + + } else if (!expect(COLON) && (tok == JTOK_COLON)) { + return false; + } + + if (expect(NOT_VALUE)) { + if (isValueOpen) + return false; + clearExpect(NOT_VALUE); + } + switch (tok) { case JTOK_OBJ_OPEN: @@ -277,13 +328,15 @@ bool UniValue::read(const char *raw) } if (utyp == VOBJ) - expectName = true; + setExpect(OBJ_NAME); + else + setExpect(ARR_VALUE); break; } case JTOK_OBJ_CLOSE: case JTOK_ARR_CLOSE: { - if (!stack.size() || expectColon || (last_tok == JTOK_COMMA)) + if (!stack.size() || (last_tok == JTOK_COMMA)) return false; VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR); @@ -292,37 +345,40 @@ bool UniValue::read(const char *raw) return false; stack.pop_back(); - expectName = false; + clearExpect(OBJ_NAME); + setExpect(NOT_VALUE); break; } case JTOK_COLON: { - if (!stack.size() || expectName || !expectColon) + if (!stack.size()) return false; UniValue *top = stack.back(); if (top->getType() != VOBJ) return false; - expectColon = false; + setExpect(VALUE); break; } case JTOK_COMMA: { - if (!stack.size() || expectName || expectColon || + if (!stack.size() || (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN)) return false; UniValue *top = stack.back(); if (top->getType() == VOBJ) - expectName = true; + setExpect(OBJ_NAME); + else + setExpect(ARR_VALUE); break; } case JTOK_KW_NULL: case JTOK_KW_TRUE: case JTOK_KW_FALSE: { - if (!stack.size() || expectName || expectColon) + if (!stack.size()) return false; UniValue tmpVal; @@ -342,17 +398,19 @@ bool UniValue::read(const char *raw) UniValue *top = stack.back(); top->values.push_back(tmpVal); + setExpect(NOT_VALUE); break; } case JTOK_NUMBER: { - if (!stack.size() || expectName || expectColon) + if (!stack.size()) return false; UniValue tmpVal(VNUM, tokenVal); UniValue *top = stack.back(); top->values.push_back(tmpVal); + setExpect(NOT_VALUE); break; } @@ -362,15 +420,16 @@ bool UniValue::read(const char *raw) UniValue *top = stack.back(); - if (expectName) { + if (expect(OBJ_NAME)) { top->keys.push_back(tokenVal); - expectName = false; - expectColon = true; + clearExpect(OBJ_NAME); + setExpect(COLON); } else { UniValue tmpVal(VSTR, tokenVal); top->values.push_back(tmpVal); } + setExpect(NOT_VALUE); break; } diff --git a/lib/univalue_write.cpp b/lib/univalue_write.cpp index bce3997af..ceb4cc916 100644 --- a/lib/univalue_write.cpp +++ b/lib/univalue_write.cpp @@ -2,7 +2,6 @@ // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. -#include #include #include #include @@ -25,10 +24,10 @@ static string json_escape(const string& inS) if (escStr) outS += escStr; - else if (isprint(ch)) + else if (ch < 0x80) outS += ch; - else { + else { // TODO handle UTF-8 properly char tmpesc[16]; sprintf(tmpesc, "\\u%04x", ch); outS += tmpesc; @@ -113,7 +112,7 @@ void UniValue::writeObject(unsigned int prettyIndent, unsigned int indentLevel, s += "\"" + json_escape(keys[i]) + "\":"; if (prettyIndent) s += " "; - s += values[i].write(prettyIndent, indentLevel + 1); + s += values.at(i).write(prettyIndent, indentLevel + 1); if (i != (values.size() - 1)) s += ","; if (prettyIndent) diff --git a/test/.gitignore b/test/.gitignore index 4afa094b1..3d9347fe7 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -1 +1,4 @@ unitester + +*.trs +*.log diff --git a/test/fail35.json b/test/fail35.json new file mode 100644 index 000000000..de30ca5c4 --- /dev/null +++ b/test/fail35.json @@ -0,0 +1 @@ +[ true true true [] [] [] ] diff --git a/test/fail36.json b/test/fail36.json new file mode 100644 index 000000000..f82eb8e1f --- /dev/null +++ b/test/fail36.json @@ -0,0 +1 @@ +{"a":} diff --git a/test/fail37.json b/test/fail37.json new file mode 100644 index 000000000..3294dc3a4 --- /dev/null +++ b/test/fail37.json @@ -0,0 +1 @@ +{"a":1 "b":2} diff --git a/test/round1.json b/test/round1.json new file mode 100644 index 000000000..a711e7308 --- /dev/null +++ b/test/round1.json @@ -0,0 +1 @@ +["\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f"] diff --git a/test/unitester.cpp b/test/unitester.cpp index 835556e03..5a052fe92 100644 --- a/test/unitester.cpp +++ b/test/unitester.cpp @@ -19,24 +19,37 @@ using namespace std; string srcdir(JSON_TEST_SRC); +static bool test_failed = false; + +#define d_assert(expr) { if (!(expr)) { test_failed = true; fprintf(stderr, "%s failed\n", filename.c_str()); } } + +static std::string rtrim(std::string s) +{ + s.erase(s.find_last_not_of(" \n\r\t")+1); + return s; +} static void runtest(string filename, const string& jdata) { - fprintf(stderr, "test %s\n", filename.c_str()); - string prefix = filename.substr(0, 4); - bool wantPass = (prefix == "pass"); + bool wantPass = (prefix == "pass") || (prefix == "roun"); bool wantFail = (prefix == "fail"); + bool wantRoundTrip = (prefix == "roun"); assert(wantPass || wantFail); UniValue val; bool testResult = val.read(jdata); if (wantPass) { - assert(testResult == true); + d_assert(testResult == true); } else { - assert(testResult == false); + d_assert(testResult == false); + } + + if (wantRoundTrip) { + std::string odata = val.write(0, 0); + assert(odata == rtrim(jdata)); } } @@ -92,6 +105,9 @@ static const char *filenames[] = { "fail32.json", "fail33.json", "fail34.json", + "fail35.json", + "fail36.json", + "fail37.json", "fail3.json", "fail4.json", // extra comma "fail5.json", @@ -102,6 +118,7 @@ static const char *filenames[] = { "pass1.json", "pass2.json", "pass3.json", + "round1.json", // round-trip test }; int main (int argc, char *argv[]) @@ -110,6 +127,6 @@ int main (int argc, char *argv[]) runtest_file(filenames[fidx]); } - return 0; + return test_failed ? 1 : 0; }