Skip to content

Commit e0370ea

Browse files
authored
issue 11: fix number parser (#16)
1 parent 099c6a8 commit e0370ea

File tree

4 files changed

+190
-149
lines changed

4 files changed

+190
-149
lines changed

parser.go

Lines changed: 48 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,6 @@ func newInfParser(t *Tokenizer, reader io.Reader, bufferSize uint) *parsing {
5959
}
6060
}
6161

62-
func (p *parsing) prev() {
63-
if p.pos > 0 {
64-
p.pos--
65-
p.curr = p.str[p.pos]
66-
}
67-
}
68-
6962
func (p *parsing) ensureBytes(n int) bool {
7063
if p.pos+n >= len(p.str) {
7164
if p.reader != nil {
@@ -273,66 +266,68 @@ func (p *parsing) parseKeyword() bool {
273266
return false
274267
}
275268

276-
const (
277-
stageCoefficient = iota + 1
278-
stageMantissa
279-
stagePower
280-
)
281-
282269
func (p *parsing) parseNumber() bool {
283270
var start = -1
284-
var needNumber = true
271+
var end = -1
272+
var floatTraitPos = -1
273+
var hasPoint = false
274+
var hasNumber = false
275+
var hasExp = false
285276

286-
var stage uint8 = 0
287277
for p.curr != 0 {
288278
if isNumberByte(p.curr) {
289-
needNumber = false
290279
if start == -1 {
291-
if stage == 0 {
292-
stage = stageCoefficient
293-
start = p.pos
294-
}
295-
}
296-
} else if p.t.allowNumberUnderscore && p.curr == '_' {
297-
if stage != stageCoefficient {
298-
break
299-
}
300-
// todo checks double underscore
301-
} else if !needNumber && p.curr == '.' {
302-
if stage != stageCoefficient {
303-
break
304-
}
305-
stage = stageMantissa
306-
needNumber = true
307-
} else if !needNumber && (p.curr == 'e' || p.curr == 'E') {
308-
if stage != stageMantissa && stage != stageCoefficient {
309-
break
310-
}
311-
ePowSign := false
312-
switch p.nextByte() {
313-
case '-', '+':
314-
ePowSign = true
315-
p.next()
280+
start = p.pos
316281
}
317-
needNumber = true
318-
if isNumberByte(p.nextByte()) {
319-
stage = stagePower
320-
} else {
321-
if ePowSign { // rollback sign position
322-
p.prev()
282+
end = p.pos
283+
hasNumber = true
284+
} else {
285+
nextByte := p.nextByte()
286+
if p.curr == '_' {
287+
if !hasNumber || (!p.t.allowNumberUnderscore || !isNumberByte(nextByte)) {
288+
break
289+
}
290+
} else if p.curr == '.' {
291+
if hasPoint {
292+
break
293+
} else if isNumberByte(nextByte) {
294+
if start == -1 { // floats can be started from a pointer
295+
start = p.pos
296+
}
297+
} else if !(nextByte == 'e' || nextByte == 'E' || nextByte == 0) {
298+
break
299+
}
300+
floatTraitPos = p.pos
301+
end = p.pos
302+
hasPoint = true
303+
} else if p.curr == 'e' || p.curr == 'E' {
304+
if !hasNumber && !(isNumberByte(nextByte) || nextByte == '-') || hasExp {
305+
break
323306
}
307+
floatTraitPos = p.pos
308+
hasExp = true
309+
hasPoint = true
310+
} else if hasExp && (p.curr == '-' || p.curr == '+') {
311+
if isNumberByte(nextByte) {
312+
if start == -1 { // numbers can be started from a sign
313+
start = p.pos
314+
}
315+
} else {
316+
break
317+
}
318+
} else {
324319
break
325320
}
326-
} else {
327-
break
328321
}
329322
p.next()
330323
}
331-
if stage == 0 {
324+
if start == -1 {
332325
return false
333326
}
334-
p.token.value = p.str[start:p.pos]
335-
if stage == stageCoefficient {
327+
end = end + 1
328+
p.pos = end
329+
p.token.value = p.str[start:end]
330+
if floatTraitPos == -1 || floatTraitPos > end-1 {
336331
p.token.key = TokenInteger
337332
p.token.offset = p.offset + start
338333
} else {
@@ -343,7 +338,7 @@ func (p *parsing) parseNumber() bool {
343338
return true
344339
}
345340

346-
// match compare next bytes from data with `r`
341+
// match compares next bytes from data with `r`
347342
func (p *parsing) match(r []byte, seek bool) bool {
348343
if r[0] == p.curr {
349344
if len(r) > 1 {

stream_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,19 @@ func TestIssues13_SequenceLongerThenStream(t *testing.T) {
243243
require.False(t, ok)
244244
}
245245

246+
func TestIssue11(t *testing.T) {
247+
parser := New()
248+
parser.AllowKeywordSymbols(nil, Numbers)
249+
parser.DefineTokens(1, []string{".."})
250+
251+
stream := parser.ParseString("1..2")
252+
require.Equal(t, "1", string(stream.CurrentToken().Value()))
253+
stream.GoNext()
254+
require.Equal(t, "..", string(stream.CurrentToken().Value()))
255+
stream.GoNext()
256+
require.Equal(t, "2", string(stream.CurrentToken().Value()))
257+
}
258+
246259
var pattern = []byte(`<item count=10 valid id="n9762"> Носки <![CDATA[ socks ]]></item>`)
247260

248261
type dataGenerator struct {

tokenizer.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ func (t *Tokenizer) SetWhiteSpaces(ws []byte) *Tokenizer {
155155
}
156156

157157
// AllowKeywordSymbols sets major and minor symbols for keywords.
158-
// Major symbols (any quantity) might be in begin, in middle and in the end of keyword.
159-
// Minor symbols (any quantity) might be in middle and in the end of the keyword.
158+
// Major symbols (any quantity) might be in the beginning, at the middle and at the end of keyword.
159+
// Minor symbols (any quantity) might be at the middle and at the end of the keyword.
160160
//
161161
// parser.AllowKeywordSymbols(tokenizer.Underscore, tokenizer.Numbers)
162162
// // allows: "_one23", "__one2__two3"
@@ -200,7 +200,7 @@ func (t *Tokenizer) AllowNumberUnderscore() *Tokenizer {
200200

201201
// DefineTokens add custom token.
202202
// There `key` unique is identifier of `tokens`, `tokens` — slice of string of tokens.
203-
// If key already exists tokens will be rewritten.
203+
// If a key already exists, tokens will be rewritten.
204204
func (t *Tokenizer) DefineTokens(key TokenKey, tokens []string) *Tokenizer {
205205
var tks []*tokenRef
206206
if key < 1 {

0 commit comments

Comments
 (0)