Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ Versions currently being supported with security updates.

| Version | Supported | EOL |
| ------- | ------------------ | ------------- |
| v1.2.x | :white_check_mark: | Jun 1st 2022 |
| v2.x | :white_check_mark: | Not defined |
| v1.2.x | :x: | Jun 1st 2022 |
| v2.x | :white_check_mark: | TBD |
| v3.x | :white_check_mark: | Not defined |

## Reporting a Vulnerability

Expand Down
14 changes: 14 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@ module github.com/corazawaf/coraza/v3

go 1.18

// Testing dependencies:
// - go-mockdns
// - go-modsecurity (optional)

// Development dependencies:
// - mage

// Build dependencies:
// - libinjection-go
// - aho-corasick

// Tinygo dependencies:
// - gjson

require (
github.com/anuraaga/go-modsecurity v0.0.0-20220824035035-b9a4099778df
github.com/corazawaf/libinjection-go v0.0.0-20220207031228-44e9c4250eb5
Expand Down
53 changes: 53 additions & 0 deletions operators/pm_from_dataset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

package operators

import (
"fmt"

"github.com/corazawaf/coraza/v3"
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
)

// TODO according to coraza researchs, re2 matching is faster than ahocorasick
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we link the benchmarks or show results?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should remove this comment, it comes from v1

// maybe we should switch in the future
// pmFromDataset is always lowercase
type pmFromDataset struct {
matcher ahocorasick.AhoCorasick
}

func (o *pmFromDataset) Init(options coraza.RuleOperatorOptions) error {
data := options.Arguments
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this going to be single value always?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could accept multiple datasets if required, but I don't see much value

dataset, ok := options.Datasets[data]
if !ok {
return fmt.Errorf("Dataset %q not found", data)
}
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{
AsciiCaseInsensitive: true,
MatchOnlyWholeWords: false,
MatchKind: ahocorasick.LeftMostLongestMatch,
DFA: true,
})

// TODO this operator is supposed to support snort data syntax: "@pmFromDataset A|42|C|44|F"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think this TODO is misleading, snort syntax would end up being inside the Dataset, wouldn't it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, we should keep it only for PM.

Regarding duplications, it depends on the operator, the dictionary will store everything case-sensitive. The aho-corasick implementation will read it case-insensitive and might probably remove duplicates.

Still, a duplicate shouldn't affect results, just performance.

o.matcher = builder.Build(dataset)
return nil
}

func (o *pmFromDataset) Evaluate(tx *coraza.Transaction, value string) bool {
if tx.Capture {
matches := o.matcher.FindAll(value)
for i, match := range matches {
if i == 10 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This value seems arbitrary (and kind of repetitive). Maybe we want to abstract it, at least in this package?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it's part of our technical debt, we should add the constant somewhere. 10 is the documented standard though.

return true
}
tx.CaptureField(i, value[match.Start():match.End()])
}
return len(matches) > 0
}
iter := o.matcher.Iter(value)
return iter.Next() != nil
}

var _ coraza.RuleOperator = (*pmFromDataset)(nil)
37 changes: 37 additions & 0 deletions operators/pm_from_dataset_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

package operators

import (
"context"
"fmt"
"testing"

"github.com/corazawaf/coraza/v3"
)

func TestPmFromDataset(t *testing.T) {
pm := &pmFromDataset{}
opts := coraza.RuleOperatorOptions{
Arguments: "test_1",
Datasets: map[string][]string{
"test_1": {"test_1", "test_2"},
},
}

if err := pm.Init(opts); err != nil {
t.Error(err)
}
waf := coraza.NewWaf()
tx := waf.NewTransaction(context.Background())
tx.Capture = true
res := pm.Evaluate(tx, "test_1")
if !res {
t.Error("pmFromDataset failed")
}
opts.Datasets = map[string][]string{}
if err := pm.Init(opts); err == nil {
t.Error(fmt.Errorf("pmFromDataset should have failed"))
}
}
3 changes: 3 additions & 0 deletions rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ type RuleOperatorOptions struct {

// Path is used to store a list of possible data paths
Path []string

// Datasets contains input datasets or dictionaries
Datasets map[string][]string
}

// RuleOperator interface is used to define rule @operators
Expand Down
32 changes: 28 additions & 4 deletions seclang/directives.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ import (

// DirectiveOptions contains the parsed options for a directive
type DirectiveOptions struct {
Waf *coraza.Waf
Config types.Config
Opts string
Path []string
Waf *coraza.Waf
Config types.Config
Opts string
Path []string
Datasets map[string][]string
}

type directive = func(options *DirectiveOptions) error
Expand Down Expand Up @@ -470,6 +471,28 @@ func directiveSecIgnoreRuleCompilationErrors(options *DirectiveOptions) error {
return nil
}

func directiveSecDataset(options *DirectiveOptions) error {
spl := strings.SplitN(options.Opts, " ", 2)
if len(spl) != 2 {
return errors.New("syntax error: SecDataset name `\n...\n`")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error is a bit unclear to me. What does \n...\n mean?

Copy link
Member Author

@jptosso jptosso Aug 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be printed as:

ERROR: Directive SecDataset - syntax error: SecDataset name `
...
`

It is hard to represent the syntax as a single line.

}
name := spl[0]
if _, ok := options.Datasets[name]; ok {
options.Waf.Logger.Warn("Dataset %s already exists, overwriting", name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather use %q, it is handy when debugging and an empty space is the cause of the problem.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, we should normalize using %q for external inputs. We should document it as technical debt

}
arr := []string{}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this good enough @anuraaga? Is it worth to work out as a best effort a capacity or since this only happens on bootstrap it is OK to use with no length.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should care much about optimizations for the seclang package and it is already way faster than modsecurity and it doesn't happen that often.

Still, we should track a seclang optimization in the future.

data := strings.Trim(spl[1], "`")
for _, s := range strings.Split(data, "\n") {
s = strings.TrimSpace(s)
if s == "" || s[0] == '#' {
continue
}
arr = append(arr, s)
}
options.Datasets[name] = arr
return nil
}

func newCompileRuleError(err error, opts string) error {
return fmt.Errorf("failed to compile rule (%s): %s", err, opts)
}
Expand Down Expand Up @@ -573,6 +596,7 @@ var directivesMap = map[string]directive{
"secauditlogfilemode": directiveSecAuditLogFileMode,
"secauditlogdirmode": directiveSecAuditLogDirMode,
"secignorerulecompilationerrors": directiveSecIgnoreRuleCompilationErrors,
"secdataset": directiveSecDataset,

// Unsupported Directives
"secargumentseparator": directiveUnsupported,
Expand Down
16 changes: 16 additions & 0 deletions seclang/directives_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,19 @@ func TestInvalidRulesWithIgnoredErrors(t *testing.T) {
t.Error("failed to error on invalid rule")
}
}

func TestSecDataset(t *testing.T) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to include more tests here. I guess the char "`" isn't supported in the dataset.

Copy link
Member Author

@jptosso jptosso Aug 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"`" is supported in the dataset, the code only interprets it if it is the last character of the directive declaration or the only character in the line.

waf := coraza.NewWaf()
p, _ := NewParser(waf)
if err := p.FromString("" +
"SecDataset test `\n123\n456\n`\n"); err != nil {
t.Error(err)
}
ds := p.options.Datasets["test"]
if len(ds) != 2 {
t.Errorf("failed to add dataset, got %d records", len(ds))
}
if ds[0] != "123" || ds[1] != "456" {
t.Error("failed to add dataset")
}
}
25 changes: 18 additions & 7 deletions seclang/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,29 @@ func (p *Parser) FromString(data string) error {
scanner := bufio.NewScanner(strings.NewReader(data))
var linebuffer = ""
pattern := regexp.MustCompile(`\\(\s+)?$`)
inQuotes := false
for scanner.Scan() {
p.currentLine++
line := scanner.Text()
linebuffer += strings.TrimSpace(line)
line := strings.TrimSpace(scanner.Text())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change requires a unit test.

Copy link
Member Author

@jptosso jptosso Aug 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CRS tests are passing, but we should definitely add more tests for this. It is a major change for the seclang parser.

if !inQuotes && len(line) > 0 && line[len(line)-1] == '`' {
inQuotes = true
} else if inQuotes && len(line) > 0 && line[0] == '`' {
inQuotes = false
}
if inQuotes {
linebuffer += line + "\n"
} else {
linebuffer += line
}

// Check if line ends with \
match := pattern.MatchString(line)
if !match {
if !pattern.MatchString(line) && !inQuotes {
err := p.evaluate(linebuffer)
if err != nil {
return err
}
linebuffer = ""
} else {
} else if !inQuotes {
linebuffer = strings.TrimSuffix(linebuffer, "\\")
}
}
Expand Down Expand Up @@ -158,8 +168,9 @@ func NewParser(waf *coraza.Waf) (*Parser, error) {
}
p := &Parser{
options: &DirectiveOptions{
Waf: waf,
Config: make(types.Config),
Waf: waf,
Config: make(types.Config),
Datasets: make(map[string][]string),
},
}
return p, nil
Expand Down
2 changes: 1 addition & 1 deletion sonar-project.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ sonar.organization=jptosso

# This is the name and version displayed in the SonarCloud UI.
sonar.projectName=coraza-waf
sonar.projectVersion=v2.0.0-dev
sonar.projectVersion=v3.0.0-dev

# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.
#sonar.sources=.
Expand Down