Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Project Overview

Pure Go library for parsing Excel 97-2004 (.xls) files in BIFF5/BIFF8 format. Fork of `github.com/extrame/xls`. Does **not** handle .xlsx files.

## Build & Test

No `go.mod` — pre-modules project. Requires `GO111MODULE=on`:

```bash
GO111MODULE=on go test -v -race ./... # run all tests
GO111MODULE=on go test -v -run TestBig # run single test
```

No Makefile, no linter config.

## Architecture

```
WorkBook → WorkSheet → Row → Col (cell)
```

- **Entry points**: `Open()`, `OpenWithCloser()`, `OpenReader()` in `xls.go`
- **WorkBook** (`workbook.go`): parses BIFF record stream, manages sheets/fonts/formats/SST
- **WorkSheet** (`worksheet.go`): lazy-loaded on `GetSheet(i)`, contains rows by index
- **Row** (`row.go`): `Col(i)` (merged-cell aware) vs `ColExact(i)` (exact match)
- **Cell types** (`col.go`): `contentHandler` interface — implementations: `NumberCol`, `LabelsstCol`, `labelCol`, `BlankCol`, `RkCol`, `MulrkCol`, `FormulaCol`, `HyperLink`, etc.
- **Binary records** (`bof.go`): all data as `bof` structs (ID uint16 + Size uint16)
- **Date handling** (`date.go`): Excel serial dates → `time.Time`, supports 1900 & 1904 systems
- **Formatting** (`xf.go`, `font.go`, `format.go`): XF records map cells to fonts/formats

Key dependency: `github.com/extrame/ole2` for OLE2 container parsing.

## Parsing Flow

1. `Open()` → OLE2 parse → create WorkBook
2. `Parse()` → read BIFF record stream → extract sheets, SST, XF, fonts, formats
3. `GetSheet(n)` → lazy-parse worksheet records into rows/cells
4. Cell access → format values via XF index lookup
3 changes: 3 additions & 0 deletions bof.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ type bof struct {

//read the utf16 string from reader
func (b *bof) utf16String(buf io.ReadSeeker, count uint32) string {
if count == 0 {
return ""
}
var bts = make([]uint16, count)
binary.Read(buf, binary.LittleEndian, &bts)
runes := utf16.Decode(bts[:len(bts)-1])
Expand Down
8 changes: 4 additions & 4 deletions cell_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ type Ranger interface {
type CellRange struct {
FirstRowB uint16
LastRowB uint16
FristColB uint16
FirstColB uint16
LastColB uint16
}

Expand All @@ -27,7 +27,7 @@ func (c *CellRange) LastRow() uint16 {
}

func (c *CellRange) FirstCol() uint16 {
return c.FristColB
return c.FirstColB
}

func (c *CellRange) LastCol() uint16 {
Expand All @@ -48,15 +48,15 @@ type HyperLink struct {

//get the hyperlink string, use the public variable Url to get the original Url
func (h *HyperLink) String(wb *WorkBook) []string {
res := make([]string, h.LastColB-h.FristColB+1)
res := make([]string, h.LastColB-h.FirstColB+1)
var str string
if h.IsUrl {
str = fmt.Sprintf("%s(%s)", h.Description, h.Url)
} else {
str = h.ExtendedFilePath
}

for i := uint16(0); i < h.LastColB-h.FristColB+1; i++ {
for i := uint16(0); i < h.LastColB-h.FirstColB+1; i++ {
res[i] = str
}
return res
Expand Down
33 changes: 29 additions & 4 deletions col.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,30 @@ type NumberCol struct {
}

func (c *NumberCol) String(wb *WorkBook) []string {
if fNo := wb.Xfs[c.Index].formatNo(); fNo != 0 {
t := timeFromExcelTime(c.Float, wb.dateMode == 1)
return []string{yymmdd.Format(t, wb.Formats[fNo].str)}
idx := int(c.Index)
if idx < len(wb.Xfs) {
fNo := wb.Xfs[idx].formatNo()
if fNo >= 164 { // user defined format
if formatter := wb.Formats[fNo]; formatter != nil {
formatterLower := strings.ToLower(formatter.str)
if formatterLower == "general" ||
strings.Contains(formatter.str, "#") ||
strings.Contains(formatter.str, ".00") ||
strings.Contains(formatterLower, "m/y") ||
strings.Contains(formatterLower, "d/y") ||
strings.Contains(formatterLower, "m.y") ||
strings.Contains(formatterLower, "d.y") ||
strings.Contains(formatterLower, "h:") ||
strings.Contains(formatterLower, "д.г") {
return []string{strconv.FormatFloat(c.Float, 'f', -1, 64)}
}
t := timeFromExcelTime(c.Float, wb.dateMode == 1)
return []string{yymmdd.Format(t, formatter.str)}
}
} else if 14 <= fNo && fNo <= 17 || fNo == 22 || 27 <= fNo && fNo <= 36 || 50 <= fNo && fNo <= 58 { // built-in date format
t := timeFromExcelTime(c.Float, wb.dateMode == 1)
return []string{t.Format(time.RFC3339)}
}
}
return []string{strconv.FormatFloat(c.Float, 'f', -1, 64)}
}
Expand Down Expand Up @@ -218,7 +239,11 @@ type LabelsstCol struct {
}

func (c *LabelsstCol) String(wb *WorkBook) []string {
return []string{wb.sst[int(c.Sst)]}
idx := int(c.Sst)
if idx < len(wb.sst) {
return []string{wb.sst[idx]}
}
return []string{""}
}

type labelCol struct {
Expand Down
3 changes: 3 additions & 0 deletions comparexlsxlsx.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ func CompareXlsXlsx(xlsfilepathname string, xlsxfilepathname string) string {
}
for row, xlsxRow := range xlsxSheet.Rows {
xlsRow := xlsSheet.Row(row)
if xlsRow == nil {
continue
}
for cell, xlsxCell := range xlsxRow.Cells {
xlsxText := xlsxCell.String()
xlsText := xlsRow.Col(cell)
Expand Down
11 changes: 8 additions & 3 deletions example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ func ExampleOpen() {
}
}

func ExampleWorkBook_NumberSheets() {
if xlFile, err := Open("Table.xls", "utf-8"); err == nil {
func ExampleWorkBook_NumSheets() {
if xlFile, closer, err := OpenWithCloser("Table.xls", "utf-8"); err == nil {
defer closer.Close()
for i := 0; i < xlFile.NumSheets(); i++ {
sheet := xlFile.GetSheet(i)
fmt.Println(sheet.Name)
Expand All @@ -21,13 +22,17 @@ func ExampleWorkBook_NumberSheets() {

//Output: read the content of first two cols in each row
func ExampleWorkBook_GetSheet() {
if xlFile, err := Open("Table.xls", "utf-8"); err == nil {
if xlFile, closer, err := OpenWithCloser("Table.xls", "utf-8"); err == nil {
defer closer.Close()
if sheet1 := xlFile.GetSheet(0); sheet1 != nil {
fmt.Print("Total Lines ", sheet1.MaxRow, sheet1.Name)
col1 := sheet1.Row(0).Col(0)
col2 := sheet1.Row(0).Col(0)
for i := 0; i <= (int(sheet1.MaxRow)); i++ {
row1 := sheet1.Row(i)
if row1 == nil {
continue
}
col1 = row1.Col(0)
col2 = row1.Col(1)
fmt.Print("\n", col1, ",", col2)
Expand Down
16 changes: 13 additions & 3 deletions row.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,19 @@ func (r *Row) Col(i int) string {
serial := uint16(i)
if ch, ok := r.cols[serial]; ok {
strs := ch.String(r.wb)
return strs[0]
if len(strs) > 0 {
return strs[0]
}
return ""
} else {
for _, v := range r.cols {
if v.FirstCol() <= serial && v.LastCol() >= serial {
strs := v.String(r.wb)
return strs[serial-v.FirstCol()]
idx := int(serial - v.FirstCol())
if idx < len(strs) {
return strs[idx]
}
return ""
}
}
}
Expand All @@ -41,7 +48,10 @@ func (r *Row) ColExact(i int) string {
serial := uint16(i)
if ch, ok := r.cols[serial]; ok {
strs := ch.String(r.wb)
return strs[0]
if len(strs) > 0 {
return strs[0]
}
return ""
}
return ""
}
Expand Down
140 changes: 140 additions & 0 deletions sst_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package xls

import (
"bytes"
"encoding/binary"
"testing"
)

func writeBIFFRecord(buf *bytes.Buffer, id uint16, data []byte) {
binary.Write(buf, binary.LittleEndian, id)
binary.Write(buf, binary.LittleEndian, uint16(len(data)))
buf.Write(data)
}

func makeBOFData() []byte {
var b bytes.Buffer
binary.Write(&b, binary.LittleEndian, uint16(0x0600)) // Ver: BIFF8
binary.Write(&b, binary.LittleEndian, uint16(0x0005)) // Type: workbook globals
b.Write(make([]byte, 12)) // Id_make, Year, Flags, Min_ver
return b.Bytes()
}

// TestSSTContinueRichtext tests that richtext formatting runs spanning
// a record boundary don't corrupt subsequent SST entries.
// String 0: "ABC" with 2 formatting runs — chars fit in SST, runs overflow to CONTINUE.
// String 1: "DEF" in CONTINUE after the formatting runs.
func TestSSTContinueRichtext(t *testing.T) {
// SST record: SstInfo + string 0 header + "ABC" (no room for formatting runs)
var sstData bytes.Buffer
binary.Write(&sstData, binary.LittleEndian, uint32(2)) // Total
binary.Write(&sstData, binary.LittleEndian, uint32(2)) // Count
binary.Write(&sstData, binary.LittleEndian, uint16(3)) // string 0 char count
sstData.WriteByte(0x08) // flag: has richtext
binary.Write(&sstData, binary.LittleEndian, uint16(2)) // richtext_num = 2 runs
sstData.Write([]byte("ABC")) // char data (Latin1)
// 0 bytes left for 8-byte formatting runs → overflow to CONTINUE

// CONTINUE record: formatting runs + string 1
var contData bytes.Buffer
contData.Write(make([]byte, 8)) // 2 runs × 4 bytes (dummy)
binary.Write(&contData, binary.LittleEndian, uint16(3)) // string 1 char count
contData.WriteByte(0x00) // flag: plain Latin1
contData.Write([]byte("DEF"))

var stream bytes.Buffer
writeBIFFRecord(&stream, 0x0809, makeBOFData())
writeBIFFRecord(&stream, 0x00FC, sstData.Bytes())
writeBIFFRecord(&stream, 0x003C, contData.Bytes())

wb := &WorkBook{Formats: make(map[uint16]*Format)}
wb.Parse(bytes.NewReader(stream.Bytes()))

if len(wb.sst) != 2 {
t.Fatalf("expected 2 SST entries, got %d", len(wb.sst))
}
if wb.sst[0] != "ABC" {
t.Errorf("sst[0]: expected %q, got %q", "ABC", wb.sst[0])
}
if wb.sst[1] != "DEF" {
t.Errorf("sst[1]: expected %q, got %q", "DEF", wb.sst[1])
}
}

// TestSSTContinueRichtextPartial tests the case where richtext formatting
// runs are partially read in the SST record (boundary falls mid-formatting).
func TestSSTContinueRichtextPartial(t *testing.T) {
// SST record: SstInfo + string 0 header + "ABC" + 4 of 8 formatting bytes
var sstData bytes.Buffer
binary.Write(&sstData, binary.LittleEndian, uint32(2)) // Total
binary.Write(&sstData, binary.LittleEndian, uint32(2)) // Count
binary.Write(&sstData, binary.LittleEndian, uint16(3)) // string 0 char count
sstData.WriteByte(0x08) // flag: has richtext
binary.Write(&sstData, binary.LittleEndian, uint16(2)) // richtext_num = 2
sstData.Write([]byte("ABC")) // chars
sstData.Write(make([]byte, 4)) // partial: 4 of 8 formatting bytes

// CONTINUE: remaining 4 formatting bytes + string 1
var contData bytes.Buffer
contData.Write(make([]byte, 4)) // remaining formatting bytes
binary.Write(&contData, binary.LittleEndian, uint16(3)) // string 1 char count
contData.WriteByte(0x00) // flag
contData.Write([]byte("DEF"))

var stream bytes.Buffer
writeBIFFRecord(&stream, 0x0809, makeBOFData())
writeBIFFRecord(&stream, 0x00FC, sstData.Bytes())
writeBIFFRecord(&stream, 0x003C, contData.Bytes())

wb := &WorkBook{Formats: make(map[uint16]*Format)}
wb.Parse(bytes.NewReader(stream.Bytes()))

if len(wb.sst) != 2 {
t.Fatalf("expected 2 SST entries, got %d", len(wb.sst))
}
if wb.sst[0] != "ABC" {
t.Errorf("sst[0]: expected %q, got %q", "ABC", wb.sst[0])
}
if wb.sst[1] != "DEF" {
t.Errorf("sst[1]: expected %q, got %q", "DEF", wb.sst[1])
}
}

// TestSSTContinuePhonetic tests that phonetic data spanning a record
// boundary is properly skipped in the CONTINUE handler.
func TestSSTContinuePhonetic(t *testing.T) {
// SST record: SstInfo + string 0 with phonetic flag, chars fit but phonetic overflows
var sstData bytes.Buffer
binary.Write(&sstData, binary.LittleEndian, uint32(2)) // Total
binary.Write(&sstData, binary.LittleEndian, uint32(2)) // Count
binary.Write(&sstData, binary.LittleEndian, uint16(3)) // string 0 char count
sstData.WriteByte(0x04) // flag: has phonetic
binary.Write(&sstData, binary.LittleEndian, uint32(12)) // phonetic_size = 12 bytes
sstData.Write([]byte("ABC")) // chars
// 0 bytes for 12-byte phonetic data → overflow

// CONTINUE: phonetic data + string 1
var contData bytes.Buffer
contData.Write(make([]byte, 12)) // phonetic data (dummy)
binary.Write(&contData, binary.LittleEndian, uint16(3)) // string 1 char count
contData.WriteByte(0x00) // flag
contData.Write([]byte("DEF"))

var stream bytes.Buffer
writeBIFFRecord(&stream, 0x0809, makeBOFData())
writeBIFFRecord(&stream, 0x00FC, sstData.Bytes())
writeBIFFRecord(&stream, 0x003C, contData.Bytes())

wb := &WorkBook{Formats: make(map[uint16]*Format)}
wb.Parse(bytes.NewReader(stream.Bytes()))

if len(wb.sst) != 2 {
t.Fatalf("expected 2 SST entries, got %d", len(wb.sst))
}
if wb.sst[0] != "ABC" {
t.Errorf("sst[0]: expected %q, got %q", "ABC", wb.sst[0])
}
if wb.sst[1] != "DEF" {
t.Errorf("sst[1]: expected %q, got %q", "DEF", wb.sst[1])
}
}
Loading