11"use strict" ;
22
3- var fs = require ( "fs " ) ,
4- assert = require ( "assert " ) ,
5- Buffer = require ( "safer-buffer" ) . Buffer ,
6- iconv = require ( "../" ) ;
3+ const assert = require ( "assert " ) ,
4+ utils = require ( "./utils " ) ,
5+ fixtures = require ( "./fixtures/gbk-big5.json" ) ,
6+ iconv = utils . requireIconv ( ) ;
77
8- var testString = "中国abc" , //unicode contains GBK-code and ascii
9- testStringGBKBuffer = Buffer . from ( [ 0xd6 , 0xd0 , 0xb9 , 0xfa , 0x61 , 0x62 , 0x63 ] ) ;
8+ const testString = "中国abc" , //unicode contains GBK-code and ascii
9+ testStringGBKBuffer = utils . bytes ( "d6 d0 b9 fa 61 62 63" ) ;
1010
11- describe ( "GBK tests" , function ( ) {
11+ describe ( "GBK tests #node-web " , function ( ) {
1212 it ( "GBK correctly encoded/decoded" , function ( ) {
1313 assert . strictEqual (
14- iconv . encode ( testString , "GBK" ) . toString ( "binary" ) ,
15- testStringGBKBuffer . toString ( "binary" )
14+ utils . hex ( iconv . encode ( testString , "GBK" ) ) ,
15+ utils . hex ( testStringGBKBuffer )
1616 ) ;
1717 assert . strictEqual ( iconv . decode ( testStringGBKBuffer , "GBK" ) , testString ) ;
1818 } ) ;
1919
2020 it ( "GB2312 correctly encoded/decoded" , function ( ) {
2121 assert . strictEqual (
22- iconv . encode ( testString , "GB2312" ) . toString ( "binary" ) ,
23- testStringGBKBuffer . toString ( "binary" )
22+ utils . hex ( iconv . encode ( testString , "GB2312" ) ) ,
23+ utils . hex ( testStringGBKBuffer )
2424 ) ;
2525 assert . strictEqual ( iconv . decode ( testStringGBKBuffer , "GB2312" ) , testString ) ;
2626 } ) ;
2727
2828 it ( "GBK file read decoded,compare with iconv result" , function ( ) {
29- var contentBuffer = fs . readFileSync ( __dirname + "/gbkFile.txt" ) ;
30- var str = iconv . decode ( contentBuffer , "GBK" ) ;
31- var iconvc = new ( require ( "iconv" ) . Iconv ) ( "GBK" , "utf8" ) ;
32- assert . strictEqual ( iconvc . convert ( contentBuffer ) . toString ( ) , str ) ;
29+ const contentBuffer = utils . bytes ( fixtures . gbk . bytes ) ;
30+ const str = iconv . decode ( contentBuffer , "GBK" ) ;
31+ assert . strictEqual ( fixtures . gbk . string , str ) ;
3332 } ) ;
3433
3534 it ( "GBK correctly decodes and encodes characters · and ×" , function ( ) {
3635 // https://github.com/ashtuchkin/iconv-lite/issues/13
3736 // Reference: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
38- var chars = "·×" ;
39- var gbkChars = Buffer . from ( [ 0xa1 , 0xa4 , 0xa1 , 0xc1 ] ) ;
40- assert . strictEqual (
41- iconv . encode ( chars , "GBK" ) . toString ( "binary" ) ,
42- gbkChars . toString ( "binary" )
43- ) ;
37+ const chars = "·×" ;
38+ const gbkChars = utils . bytes ( "a1 a4 a1 c1" ) ;
39+ assert . strictEqual ( utils . hex ( iconv . encode ( chars , "GBK" ) ) , utils . hex ( gbkChars ) ) ;
4440 assert . strictEqual ( iconv . decode ( gbkChars , "GBK" ) , chars ) ;
4541 } ) ;
4642
4743 it ( "GBK and GB18030 correctly decodes and encodes Euro character" , function ( ) {
4844 // Euro character (U+20AC) has two encodings in GBK family: 0x80 and 0xA2 0xE3
4945 // According to W3C's technical recommendation (https://www.w3.org/TR/encoding/#gbk-encoder),
5046 // Both GBK and GB18030 decoders should accept both encodings.
51- var gbkEuroEncoding1 = Buffer . from ( [ 0x80 ] ) ,
52- gbkEuroEncoding2 = Buffer . from ( [ 0xa2 , 0xe3 ] ) ,
47+ const gbkEuroEncoding1 = utils . bytes ( "80" ) ,
48+ gbkEuroEncoding2 = utils . bytes ( "a2 e3" ) ,
5349 strEuro = "€" ;
5450
5551 assert . strictEqual ( iconv . decode ( gbkEuroEncoding1 , "GBK" ) , strEuro ) ;
@@ -58,13 +54,10 @@ describe("GBK tests", function () {
5854 assert . strictEqual ( iconv . decode ( gbkEuroEncoding2 , "GB18030" ) , strEuro ) ;
5955
6056 // But when decoding, GBK should produce 0x80, but GB18030 - 0xA2 0xE3.
57+ assert . strictEqual ( utils . hex ( iconv . encode ( strEuro , "GBK" ) ) , utils . hex ( gbkEuroEncoding1 ) ) ;
6158 assert . strictEqual (
62- iconv . encode ( strEuro , "GBK" ) . toString ( "hex" ) ,
63- gbkEuroEncoding1 . toString ( "hex" )
64- ) ;
65- assert . strictEqual (
66- iconv . encode ( strEuro , "GB18030" ) . toString ( "hex" ) ,
67- gbkEuroEncoding2 . toString ( "hex" )
59+ utils . hex ( iconv . encode ( strEuro , "GB18030" ) ) ,
60+ utils . hex ( gbkEuroEncoding2 )
6861 ) ;
6962 } ) ;
7063
@@ -92,65 +85,54 @@ describe("GBK tests", function () {
9285 ) ;
9386 } ) ;
9487
95- function swapBytes ( buf ) {
96- for ( var i = 0 ; i < buf . length ; i += 2 ) buf . writeUInt16LE ( buf . readUInt16BE ( i ) , i ) ;
97- return buf ;
98- }
99- function spacify4 ( str ) {
100- return str . replace ( / ( ....) / g, "$1 " ) . trim ( ) ;
101- }
102- function strToHex ( str ) {
103- return spacify4 ( swapBytes ( Buffer . from ( str , "ucs2" ) ) . toString ( "hex" ) ) ;
104- }
105-
10688 it ( "GB18030 encodes/decodes 4 byte sequences" , function ( ) {
107- var chars = {
108- "\u0080" : Buffer . from ( [ 0x81 , 0x30 , 0x81 , 0x30 ] ) ,
109- "\u0081" : Buffer . from ( [ 0x81 , 0x30 , 0x81 , 0x31 ] ) ,
110- "\u008b" : Buffer . from ( [ 0x81 , 0x30 , 0x82 , 0x31 ] ) ,
111- "\u0615" : Buffer . from ( [ 0x81 , 0x31 , 0x82 , 0x31 ] ) ,
112- 㦟 : Buffer . from ( [ 0x82 , 0x31 , 0x82 , 0x31 ] ) ,
113- "\udbd9\ude77" : Buffer . from ( [ 0xe0 , 0x31 , 0x82 , 0x31 ] ) ,
89+ const chars = {
90+ "\u0080" : utils . bytes ( "81 30 81 30" ) ,
91+ "\u0081" : utils . bytes ( "81 30 81 31" ) ,
92+ "\u008b" : utils . bytes ( "81 30 82 31" ) ,
93+ "\u0615" : utils . bytes ( "81 31 82 31" ) ,
94+ 㦟 : utils . bytes ( "82 31 82 31" ) ,
95+ "\udbd9\ude77" : utils . bytes ( "e0 31 82 31" ) ,
11496 } ;
115- for ( var uChar in chars ) {
116- var gbkBuf = chars [ uChar ] ;
97+ for ( const uChar in chars ) {
98+ const gbkBuf = chars [ uChar ] ;
99+ assert . strictEqual ( utils . hex ( iconv . encode ( uChar , "GB18030" ) ) , utils . hex ( gbkBuf ) ) ;
117100 assert . strictEqual (
118- iconv . encode ( uChar , "GB18030" ) . toString ( "hex" ) ,
119- gbkBuf . toString ( "hex" )
101+ utils . strToHex ( iconv . decode ( gbkBuf , "GB18030" ) ) ,
102+ utils . strToHex ( uChar )
120103 ) ;
121- assert . strictEqual ( strToHex ( iconv . decode ( gbkBuf , "GB18030" ) ) , strToHex ( uChar ) ) ;
122104 }
123105 } ) ;
124106
125107 it ( "GB18030 correctly decodes incomplete 4 byte sequences" , function ( ) {
126- var chars = {
127- "�" : Buffer . from ( [ 0x82 ] ) ,
128- "�1" : Buffer . from ( [ 0x82 , 0x31 ] ) ,
129- "�1�" : Buffer . from ( [ 0x82 , 0x31 , 0x82 ] ) ,
130- 㦟 : Buffer . from ( [ 0x82 , 0x31 , 0x82 , 0x31 ] ) ,
131- "� " : Buffer . from ( [ 0x82 , 0x20 ] ) ,
132- "�1 " : Buffer . from ( [ 0x82 , 0x31 , 0x20 ] ) ,
133- "�1� " : Buffer . from ( [ 0x82 , 0x31 , 0x82 , 0x20 ] ) ,
134- "\u399f " : Buffer . from ( [ 0x82 , 0x31 , 0x82 , 0x31 , 0x20 ] ) ,
135- "�1\u4fdb" : Buffer . from ( [ 0x82 , 0x31 , 0x82 , 0x61 ] ) ,
136- "�1\u5010\u0061" : Buffer . from ( [ 0x82 , 0x31 , 0x82 , 0x82 , 0x61 ] ) ,
137- 㦟俛 : Buffer . from ( [ 0x82 , 0x31 , 0x82 , 0x31 , 0x82 , 0x61 ] ) ,
138- "�1\u50101�1" : Buffer . from ( [ 0x82 , 0x31 , 0x82 , 0x82 , 0x31 , 0x82 , 0x31 ] ) ,
108+ const chars = {
109+ "�" : utils . bytes ( "82" ) ,
110+ "�1" : utils . bytes ( "82 31" ) ,
111+ "�1�" : utils . bytes ( "82 31 82" ) ,
112+ 㦟 : utils . bytes ( "82 31 82 31" ) ,
113+ "� " : utils . bytes ( "82 20" ) ,
114+ "�1 " : utils . bytes ( "82 31 20" ) ,
115+ "�1� " : utils . bytes ( "82 31 82 20" ) ,
116+ "\u399f " : utils . bytes ( "82 31 82 31 20" ) ,
117+ "�1\u4fdb" : utils . bytes ( "82 31 82 61" ) ,
118+ "�1\u5010\u0061" : utils . bytes ( "82 31 82 82 61" ) ,
119+ 㦟俛 : utils . bytes ( "82 31 82 31 82 61" ) ,
120+ "�1\u50101�1" : utils . bytes ( "82 31 82 82 31 82 31" ) ,
139121 } ;
140- for ( var uChar in chars ) {
141- var gbkBuf = chars [ uChar ] ;
142- assert . strictEqual ( strToHex ( iconv . decode ( gbkBuf , "GB18030" ) ) , strToHex ( uChar ) ) ;
122+ for ( const uChar in chars ) {
123+ const gbkBuf = chars [ uChar ] ;
124+ assert . strictEqual (
125+ utils . strToHex ( iconv . decode ( gbkBuf , "GB18030" ) ) ,
126+ utils . strToHex ( uChar )
127+ ) ;
143128 }
144129 } ) ;
145130
146131 it ( "GB18030:2005 changes are applied" , function ( ) {
147132 // See https://github.com/whatwg/encoding/issues/22
148- var chars = "\u1E3F\u0000\uE7C7" ; // Use \u0000 as separator
149- var gbkChars = Buffer . from ( [ 0xa8 , 0xbc , 0x00 , 0x81 , 0x35 , 0xf4 , 0x37 ] ) ;
133+ const chars = "\u1E3F\u0000\uE7C7" ; // Use \u0000 as separator
134+ const gbkChars = utils . bytes ( "a8 bc 00 81 35 f4 37" ) ;
150135 assert . strictEqual ( iconv . decode ( gbkChars , "GB18030" ) , chars ) ;
151- assert . strictEqual (
152- iconv . encode ( chars , "GB18030" ) . toString ( "hex" ) ,
153- gbkChars . toString ( "hex" )
154- ) ;
136+ assert . strictEqual ( utils . hex ( iconv . encode ( chars , "GB18030" ) ) , utils . hex ( gbkChars ) ) ;
155137 } ) ;
156138} ) ;
0 commit comments