From ba886fad824cab6e187c0ea7c8b02096eb5d7cb4 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 12 Dec 2017 12:49:13 +0100 Subject: [PATCH] Add option to serialize UTF-8 strings directly The JSON serializer assumes that std::string and char* values are encoded as Latin1, while cxxtools::String are unicode strings (using characters from the BMP). This is impractical when serializing UTF-8 data. Instead of converting from UTF-8 to cxxtools::String and then using the \uXXXX notation, add a flag inputUtf8 to tell the serializer that std::string and char* values are UTF-8 encoded and can be inserted into the UTF-8 JSON verbatim. --- include/cxxtools/json.h | 15 ++++++++++++++- include/cxxtools/jsonformatter.h | 11 +++++++++-- include/cxxtools/jsonserializer.h | 4 ++++ src/jsonformatter.cpp | 3 ++- test/jsonserializer-test.cpp | 10 ++++++++++ 5 files changed, 39 insertions(+), 4 deletions(-) diff --git a/include/cxxtools/json.h b/include/cxxtools/json.h index f2592988..13a30895 100644 --- a/include/cxxtools/json.h +++ b/include/cxxtools/json.h @@ -58,6 +58,7 @@ namespace cxxtools const ObjectType& _constObject; bool _beautify; bool _plainkey; + bool _inputUtf8; public: /// Constructor. Needs the wrapped object. Optionally a flag can be @@ -65,7 +66,8 @@ namespace cxxtools explicit JsonOObject(const ObjectType& object, bool beautify = false) : _constObject(object), _beautify(beautify), - _plainkey(false) + _plainkey(false), + _inputUtf8(false) { } /// Sets the formatting for json. If the passed flag is true, enables @@ -83,6 +85,16 @@ namespace cxxtools bool plainkey() const { return _plainkey; } + // Tells the serializer that std::string and char* on input are + // UTF-8 encoded and do not need to be encoded further. By default, + // they are assumed to be Latin1 encoded. cxxtools::string is a wide + // string, so it's not affected by this setting + JsonOObject& inputUtf8(bool sw) + { _inputUtf8 = sw; return *this; } + + bool inputUtf8() const + { return _inputUtf8; } + const ObjectType& object() const { return _constObject; } }; @@ -96,6 +108,7 @@ namespace cxxtools JsonSerializer serializer(out); serializer.beautify(object.beautify()); serializer.plainkey(object.plainkey()); + serializer.inputUtf8(object.inputUtf8()); serializer.serialize(object.object()) .finish(); } diff --git a/include/cxxtools/jsonformatter.h b/include/cxxtools/jsonformatter.h index bfb89155..21eacd33 100644 --- a/include/cxxtools/jsonformatter.h +++ b/include/cxxtools/jsonformatter.h @@ -42,7 +42,8 @@ namespace cxxtools _level(1), _lastLevel(0), _beautify(false), - _plainkey(false) + _plainkey(false), + _inputUtf8(false) { } @@ -51,7 +52,8 @@ namespace cxxtools _level(1), _lastLevel(0), _beautify(false), - _plainkey(false) + _plainkey(false), + _inputUtf8(false) { begin(out); } @@ -106,6 +108,10 @@ namespace cxxtools void plainkey(bool sw) { _plainkey = sw; } + bool inputUtf8() const { return _inputUtf8; } + + void inputUtf8(bool sw) { _inputUtf8 = sw; } + void beginValue(const std::string& name); void finishValue(); @@ -120,6 +126,7 @@ namespace cxxtools unsigned _lastLevel; bool _beautify; bool _plainkey; + bool _inputUtf8; }; } diff --git a/include/cxxtools/jsonserializer.h b/include/cxxtools/jsonserializer.h index 090bbd1a..eb6709f0 100644 --- a/include/cxxtools/jsonserializer.h +++ b/include/cxxtools/jsonserializer.h @@ -184,6 +184,10 @@ namespace cxxtools void plainkey(bool sw) { _formatter.plainkey(sw); } + bool inputUtf8() const { return _formatter.inputUtf8(); } + + void inputUtf8(bool sw) { _formatter.inputUtf8(sw); } + template static std::string toString(const T& type, const std::string& name, bool beautify = false) { diff --git a/src/jsonformatter.cpp b/src/jsonformatter.cpp index 06cce3db..ba1449f6 100644 --- a/src/jsonformatter.cpp +++ b/src/jsonformatter.cpp @@ -399,7 +399,8 @@ void JsonFormatter::stringOut(const std::string& str) *_os << "\\r"; else if (*it == '\t') *_os << "\\t"; - else if (static_cast(*it) >= 0x80 || static_cast(*it) < 0x20) + else if ((!_inputUtf8 && static_cast(*it) >= 0x80) || + static_cast(*it) < 0x20) { *_os << "\\u"; static const char hex[] = "0123456789abcdef"; diff --git a/test/jsonserializer-test.cpp b/test/jsonserializer-test.cpp index 68fff9d8..e811b1c6 100644 --- a/test/jsonserializer-test.cpp +++ b/test/jsonserializer-test.cpp @@ -111,6 +111,7 @@ class JsonSerializerTest : public cxxtools::unit::TestSuite registerMethod("testDirect", *this, &JsonSerializerTest::testDirect); registerMethod("testEasyJson", *this, &JsonSerializerTest::testEasyJson); registerMethod("testPlainkey", *this, &JsonSerializerTest::testPlainkey); + registerMethod("testInputUtf8", *this, &JsonSerializerTest::testInputUtf8); } void testInt() @@ -341,6 +342,15 @@ class JsonSerializerTest : public cxxtools::unit::TestSuite "ddd:4}"); } } + + void testInputUtf8() + { + std::string str("Euro sign: \342\202\254"); + std::ostringstream out; + out << cxxtools::Json(str).inputUtf8(true); + CXXTOOLS_UNIT_ASSERT_EQUALS(out.str(), "\"" + str + "\""); + } + }; cxxtools::unit::RegisterTest register_JsonSerializerTest;