11"use strict" ;
22
3- // Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
3+ // == UTF16-LE codec. ==========================================================
4+ // Note: We're not using Node.js native codec because StringDecoder implementation is buggy
5+ // (adds \0 in some chunks; doesn't flag non-even number of bytes). We do use raw encoding/decoding
6+ // routines for performance, though.
7+
8+ exports . utf16le = class Utf16LECodec {
9+ createEncoder ( options , iconv ) {
10+ return new Utf16LEEncoder ( iconv . backend ) ;
11+ }
12+ createDecoder ( options , iconv ) {
13+ return new Utf16LEDecoder ( iconv . backend , iconv . defaultCharUnicode ) ;
14+ }
15+ get bomAware ( ) { return true ; }
16+ }
17+
18+ class Utf16LEEncoder {
19+ constructor ( backend ) {
20+ this . backend = backend ;
21+ }
22+
23+ write ( str ) {
24+ const bytes = this . backend . allocBytes ( str . length * 2 ) ;
25+ const chars = new Uint16Array ( bytes . buffer , bytes . byteOffset , str . length ) ;
26+ for ( let i = 0 ; i < str . length ; i ++ ) {
27+ chars [ i ] = str . charCodeAt ( i ) ;
28+ }
29+ return this . backend . bytesToResult ( bytes , bytes . length ) ;
30+ }
31+
32+ end ( ) { }
33+ }
34+
35+ class Utf16LEDecoder {
36+ constructor ( backend , defaultChar ) {
37+ this . backend = backend ;
38+ this . defaultChar = defaultChar ;
39+ this . overflowByte = - 1 ;
40+ this . prefixSurrogate = undefined ;
41+ }
42+
43+ write ( buf ) {
44+ if ( buf . length == 0 ) {
45+ return '' ;
46+ }
47+ let byteOffset = buf . byteOffset ;
48+ let byteLen = buf . length ;
49+
50+ // Process previous overflowByte
51+ let prefix = '' ;
52+ if ( this . overflowByte !== - 1 ) {
53+ byteOffset ++ ; byteLen -- ;
54+ prefix = String . fromCharCode ( this . overflowByte + ( buf [ 0 ] << 8 ) ) ;
55+ }
56+
57+ // Set new overflowByte
58+ if ( byteLen & 1 ) {
59+ this . overflowByte = buf [ buf . length - 1 ] ;
60+ byteLen -- ;
61+ } else {
62+ this . overflowByte = - 1 ;
63+ }
64+
65+ let chars ;
66+ if ( byteOffset & 1 === 0 ) {
67+ // If byteOffset is aligned, just use the ArrayBuffer from input buf.
68+ chars = new Uint16Array ( buf . buffer , byteOffset , byteLen >> 1 ) ;
69+ } else {
70+ // If byteOffset is NOT aligned, create a new aligned buffer and copy the data.
71+ chars = this . backend . allocRawChars ( byteLen >> 1 ) ;
72+ const srcByteView = new Uint8Array ( buf . buffer , byteOffset , byteLen ) ;
73+ const destByteView = new Uint8Array ( chars . buffer , chars . byteOffset , byteLen ) ;
74+ destByteView . set ( srcByteView ) ;
75+ }
76+
77+ let res = prefix + this . backend . rawCharsToResult ( chars , chars . length ) ;
78+ if ( res ) {
79+ // Add high surrogate from previous chunk.
80+ if ( this . prefixSurrogate ) {
81+ res = this . prefixSurrogate + res ;
82+ this . prefixSurrogate = undefined ;
83+ }
84+
85+ // Slice off a new high surrogate at the end of the current chunk.
86+ const lastChar = res . charCodeAt ( res . length - 1 ) ;
87+ if ( 0xD800 <= lastChar && lastChar < 0xDC00 ) {
88+ this . prefixSurrogate = res [ res . length - 1 ] ;
89+ res = res . slice ( 0 , - 1 ) ;
90+ }
91+ }
92+ return res ;
93+ }
94+
95+ end ( ) {
96+ if ( this . prefixSurrogate || this . overflowByte !== - 1 ) {
97+ const res = ( this . prefixSurrogate ? this . prefixSurrogate : '' ) + ( this . overflowByte !== - 1 ? this . defaultChar : '' ) ;
98+ this . prefixSurrogate = undefined ;
99+ this . overflowByte = - 1 ;
100+ return res ;
101+ }
102+ }
103+ }
104+ exports . ucs2 = "utf16le" ; // Alias
105+
4106
5107// == UTF16-BE codec. ==========================================================
6108
7109exports . utf16be = class Utf16BECodec {
8- get encoder ( ) { return Utf16BEEncoder ; }
9- get decoder ( ) { return Utf16BEDecoder ; }
110+ createEncoder ( options , iconv ) {
111+ return new Utf16BEEncoder ( iconv . backend ) ;
112+ }
113+ createDecoder ( options , iconv ) {
114+ return new Utf16BEDecoder ( iconv . backend , iconv . defaultCharUnicode ) ;
115+ }
10116 get bomAware ( ) { return true ; }
11117}
12118
13119class Utf16BEEncoder {
14- constructor ( opts , codec , backend ) {
120+ constructor ( backend ) {
15121 this . backend = backend ;
16122 }
17123
@@ -30,30 +136,59 @@ class Utf16BEEncoder {
30136}
31137
32138class Utf16BEDecoder {
33- constructor ( opts , codec , backend ) {
139+ constructor ( backend , defaultChar ) {
34140 this . backend = backend ;
141+ this . defaultChar = defaultChar ;
35142 this . overflowByte = - 1 ;
143+ this . prefixSurrogate = undefined ;
36144 }
37145
38146 write ( buf ) {
147+ if ( buf . length === 0 ) {
148+ return '' ;
149+ }
150+
39151 const chars = this . backend . allocRawChars ( ( buf . length + 1 ) >> 1 ) ;
40152 let charsPos = 0 , i = 0 ;
41153
42- if ( this . overflowByte !== - 1 && i < buf . length ) {
154+ if ( this . overflowByte !== - 1 ) {
43155 chars [ charsPos ++ ] = ( this . overflowByte << 8 ) + buf [ i ++ ] ;
44156 }
45157
158+ // NOTE: we can win another 10% perf by using chars[i >> 1].
159+ // NOTE: the double-reverse method takes almost the same time.
46160 for ( ; i < buf . length - 1 ; i += 2 ) {
47161 chars [ charsPos ++ ] = ( buf [ i ] << 8 ) + buf [ i + 1 ] ;
48162 }
49163
50164 this . overflowByte = ( i == buf . length - 1 ) ? buf [ i ] : - 1 ;
51165
52- return this . backend . rawCharsToResult ( chars , charsPos ) ;
166+ let res = this . backend . rawCharsToResult ( chars , charsPos ) ;
167+ if ( res ) {
168+ // Add high surrogate from previous chunk.
169+ if ( this . prefixSurrogate ) {
170+ res = this . prefixSurrogate + res ;
171+ this . prefixSurrogate = undefined ;
172+ }
173+
174+ // Slice off a new high surrogate at the end of the current chunk.
175+ const lastChar = res . charCodeAt ( res . length - 1 ) ;
176+ if ( 0xD800 <= lastChar && lastChar < 0xDC00 ) {
177+ this . prefixSurrogate = res [ res . length - 1 ] ;
178+ res = res . slice ( 0 , - 1 ) ;
179+ }
180+ }
181+ return res ;
182+
53183 }
54184
55185 end ( ) {
56- this . overflowByte = - 1 ;
186+ if ( this . prefixSurrogate || this . overflowByte !== - 1 ) {
187+ const res = ( this . prefixSurrogate ? this . prefixSurrogate : '' ) + ( this . overflowByte !== - 1 ? this . defaultChar : '' ) ;
188+ this . prefixSurrogate = undefined ;
189+ this . overflowByte = - 1 ;
190+ return res ;
191+ }
57192 }
58193}
59194
@@ -67,39 +202,25 @@ class Utf16BEDecoder {
67202// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
68203
69204exports . utf16 = class Utf16Codec {
70- constructor ( opts , iconv ) {
71- this . iconv = iconv ;
72- }
73- get encoder ( ) { return Utf16Encoder ; }
74- get decoder ( ) { return Utf16Decoder ; }
75- }
76-
77- class Utf16Encoder {
78- constructor ( options , codec ) {
205+ createEncoder ( options , iconv ) {
79206 options = options || { } ;
80207 if ( options . addBOM === undefined )
81208 options . addBOM = true ;
82- this . encoder = codec . iconv . getEncoder ( options . use || 'utf-16le' , options ) ;
209+ return iconv . getEncoder ( 'utf-16le' , options ) ;
83210 }
84-
85- // Pass-through to this.encoder
86- write ( str ) {
87- return this . encoder . write ( str ) ;
88- }
89-
90- end ( ) {
91- return this . encoder . end ( ) ;
211+ createDecoder ( options , iconv ) {
212+ return new Utf16Decoder ( options , iconv ) ;
92213 }
93214}
94215
95216class Utf16Decoder {
96- constructor ( options , codec ) {
217+ constructor ( options , iconv ) {
97218 this . decoder = null ;
98219 this . initialBufs = [ ] ;
99220 this . initialBufsLen = 0 ;
100221
101222 this . options = options || { } ;
102- this . iconv = codec . iconv ;
223+ this . iconv = iconv ;
103224 }
104225
105226 write ( buf ) {
0 commit comments