Backport of fix found here: https://github.com/joyent/node/commit/78b0e30954111cfaba0edbeee85450d8cbc6fdf6 Note, this patch is modified to use ASSERT instead of DCHECK because this version of node is from before the rename which happened here: https://codereview.chromium.org/430503007 --- src/unicode-inl.h.orig 2013-05-01 12:56:29 UTC +++ src/unicode-inl.h @@ -168,6 +168,7 @@ unsigned Utf8::Length(uchar c, int previ Utf8DecoderBase::Utf8DecoderBase() : unbuffered_start_(NULL), + unbuffered_length_(0), utf16_length_(0), last_byte_of_buffer_unused_(false) {} @@ -207,8 +208,7 @@ unsigned Utf8Decoder::Write if (length <= buffer_length) return length; ASSERT(unbuffered_start_ != NULL); // Copy the rest the slow way. - WriteUtf16Slow(unbuffered_start_, - data + buffer_length, + WriteUtf16Slow(unbuffered_start_, unbuffered_length_, data + buffer_length, length - buffer_length); return length; } --- src/unicode.cc.orig 2013-05-01 12:56:29 UTC +++ src/unicode.cc @@ -284,6 +284,7 @@ void Utf8DecoderBase::Reset(uint16_t* bu // Assume everything will fit in the buffer and stream won't be needed. last_byte_of_buffer_unused_ = false; unbuffered_start_ = NULL; + unbuffered_length_ = 0; bool writing_to_buffer = true; // Loop until stream is read, writing to buffer as long as buffer has space. unsigned utf16_length = 0; @@ -310,6 +311,7 @@ void Utf8DecoderBase::Reset(uint16_t* bu // Just wrote last character of buffer writing_to_buffer = false; unbuffered_start_ = stream; + unbuffered_length_ = stream_length; } continue; } @@ -319,20 +321,24 @@ void Utf8DecoderBase::Reset(uint16_t* bu writing_to_buffer = false; last_byte_of_buffer_unused_ = true; unbuffered_start_ = stream - cursor; + unbuffered_length_ = stream_length + cursor; } utf16_length_ = utf16_length; } void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, + unsigned stream_length, uint16_t* data, unsigned data_length) { while (data_length != 0) { unsigned cursor = 0; - uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor); + + uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); // There's a total lack of bounds checking for stream // as it was already done in Reset. stream += cursor; + stream_length -= cursor; if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { *data++ = Utf16::LeadSurrogate(character); *data++ = Utf16::TrailSurrogate(character); @@ -343,6 +349,7 @@ void Utf8DecoderBase::WriteUtf16Slow(con data_length -= 1; } } + ASSERT(stream_length >= 0); } --- src/unicode.h.orig 2013-05-01 12:56:29 UTC +++ src/unicode.h @@ -184,10 +184,10 @@ class Utf8DecoderBase { unsigned buffer_length, const uint8_t* stream, unsigned stream_length); - static void WriteUtf16Slow(const uint8_t* stream, - uint16_t* data, - unsigned length); + static void WriteUtf16Slow(const uint8_t* stream, unsigned stream_length, + uint16_t* data, unsigned length); const uint8_t* unbuffered_start_; + unsigned unbuffered_length_; unsigned utf16_length_; bool last_byte_of_buffer_unused_; private: