Removed minimumConfidence from Model

andreekeberg · andreekeberg · commit d82c2e4e0c48 · 2020-08-28T02:56:24.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,8 +2,15 @@
 
 All notable changes to this project will be documented in this file.
 
+## [2.0.0] - 2020-08-28
+
+### Breaking changes
+
+* Removed minimumConfidence from Model
+
 ## [1.0.0] - 2020-08-26
 
 Initial release
 
+[2.0.0]: https://github.com/andreekeberg/ml-classify-text-js/releases/tag/2.0.0
 [1.0.0]: https://github.com/andreekeberg/ml-classify-text-js/releases/tag/1.0.0
diff --git a/README.md b/README.md
@@ -93,7 +93,6 @@ The following configuration options can be passed both directly to a new [Model]
 | --- | --- | --- | --- |
 | **nGramMin** | `int` | `1` | Minimum n-gram size |
 | **nGramMax** | `int` | `1` | Maximum n-gram size |
-| **minimumConfidence** | `int` \| `float` | `0.2` | Minimum confidence required for predictions |
 | **vocabulary** | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data, set to `false` to store terms directly in the data entries |
 | **data** | `Object` | `{}` | Key-value store of labels and training data vectors |
 
@@ -147,7 +146,6 @@ Returning:
 {
     nGramMin: 1,
     nGramMax: 1,
-    minimumConfidence: 0.2,
     vocabulary: [
     	'this',    'is',      'great',
     	'so',      'cool',    'wow',
diff --git a/docs/classifier.md b/docs/classifier.md
@@ -21,7 +21,6 @@
 | [model] | `Model` \| `Object` |  |  |
 | [model.nGramMin] | `int` | `1` | Minimum n-gram size |
 | [model.nGramMax] | `int` | `1` | Maximum n-gram size |
-| [model.minimumConfidence] | `int` \| `float` | `0.2` | Minimum confidence required for predictions |
 | [model.vocabulary] | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data, set to `false` to store terms directly in the data entries |
 | [model.data] | `Object` | `{}` | Key-value store of labels and training data vectors |
 
@@ -49,7 +48,7 @@ Return an array of one or more Prediction instances
 | --- | --- | --- | --- |
 | input | `string` |  | Input string to make a prediction from |
 | [maxMatches] | `int` | `1` | Maximum number of predictions to return |
-| [minimumConfidence] | `float` | `null` | Minimum confidence required to include a prediction |
+| [minimumConfidence] | `float` | `0.2` | Minimum confidence required to include a prediction |
 
 <a name="Classifier+splitWords"></a>
 
diff --git a/docs/model.md b/docs/model.md
@@ -6,7 +6,6 @@
     * [new Model([config])](#new_Model_new)
     * [.nGramMin](#Model+nGramMin) : `int`
     * [.nGramMax](#Model+nGramMax) : `int`
-    * [.minimumConfidence](#Model+minimumConfidence) : `float`
     * [.vocabulary](#Model+vocabulary) : `Vocabulary` \| `false`
     * [.data](#Model+data) : `Object`
     * [.serialize()](#Model+serialize) ⇒ `Object`
@@ -20,7 +19,6 @@
 | [config] | `Object` |  |  |
 | [config.nGramMin] | `int` | `1` | Minimum n-gram size |
 | [config.nGramMax] | `int` | `1` | Maximum n-gram size |
-| [config.minimumConfidence] | `int` \| `float` | `0.2` | Minimum confidence required for predictions |
 | [config.vocabulary] | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries |
 | [config.data] | `Object` | `{}` | Key-value store containing all training data |
 
@@ -34,11 +32,6 @@ Minimum n-gram size
 ### model.nGramMax : `int`
 Maximum n-gram size
 
-<a name="Model+minimumConfidence"></a>
-
-### model.minimumConfidence : `float`
-Minimum confidence required for predictions
-
 <a name="Model+vocabulary"></a>
 
 ### model.vocabulary : `Vocabulary` \| `false`
@@ -52,7 +45,6 @@ Model data
 <a name="Model+serialize"></a>
 
 ### model.serialize() ⇒ `Object`
-Return the model in its current state for storing, including the configured
-n-gram min/max values, the minimum confidence required for for predictions,
-the vocabulary as an array (if any, otherwise false), and an object literal
-with all the training data
+Return the model in its current state an an object literal, including the
+configured n-gram min/max values, the vocabulary as an array (if any,
+otherwise false), and an object literal with all the training data
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "ml-classify-text",
-  "version": "1.0.0",
+  "version": "2.0.0",
   "description": "Text classification using n-grams and cosine similarity",
   "module": "./lib",
   "main": "./lib",
diff --git a/src/classifier.js b/src/classifier.js
@@ -6,7 +6,6 @@ import Prediction from './prediction'
  * @param {(Model|Object)} [model]
  * @param {int} [model.nGramMin=1] - Minimum n-gram size
  * @param {int} [model.nGramMax=1] - Maximum n-gram size
- * @param {(int|float)} [model.minimumConfidence=0.2] - Minimum confidence required for predictions
  * @param {(Array|Set|false)} [model.vocabulary=[]] - Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries
  * @param {Object} [model.data={}] - Key-value store containing all training data
  * @constructor
@@ -94,24 +93,32 @@ class Classifier {
      *
      * @param {string} input - Input string to make a prediction from
      * @param {int} [maxMatches=1] Maximum number of predictions to return
-     * @param {float} [minimumConfidence=null] Minimum confidence required to include a prediction
+     * @param {float} [minimumConfidence=0.2] Minimum confidence required to include a prediction
      * @return {Array}
      */
-    predict(input, maxMatches = 1, minimumConfidence = null) {
+    predict(input, maxMatches = 1, minimumConfidence = 0.2) {
         if (typeof input !== 'string') {
             throw new Error('input must be a string')
         }
 
+        if (typeof minimumConfidence !== 'number') {
+            throw new Error('minimumConfidence must be a number')
+        }
+
+        if (minimumConfidence < 0) {
+            throw new Error('minimumConfidence can not be lower than 0')
+        }
+
+        if (minimumConfidence > 1) {
+            throw new Error('minimumConfidence can not be higher than 1')
+        }
+
         let tokens = this.tokenize(input)
 
         if (this.vocabulary !== false) {
             tokens = this.vectorize(tokens)
         }
 
-        if (minimumConfidence === null) {
-            minimumConfidence = this.model.minimumConfidence
-        }
-
         let predictions = []
 
         Object.keys(this._model.data).forEach(label => {
diff --git a/src/model.js b/src/model.js
@@ -4,7 +4,6 @@ import Vocabulary from './vocabulary'
  * @param {Object} [config]
  * @param {int} [config.nGramMin=1] - Minimum n-gram size
  * @param {int} [config.nGramMax=1] - Maximum n-gram size
- * @param {(int|float)} [config.minimumConfidence=0.2] - Minimum confidence required for predictions
  * @param {(Array|Set|false)} [config.vocabulary=[]] - Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries
  * @param {Object} [config.data={}] - Key-value store containing all training data
  * @constructor
@@ -18,7 +17,6 @@ class Model {
         config = {
             nGramMin: 1,
             nGramMax: 1,
-            minimumConfidence: 0.2,
             vocabulary: [],
             data: {},
             ...config
@@ -40,18 +38,6 @@ class Model {
             throw new Error('Config value nGramMax must be at least 1')
         }
 
-        if (typeof config.minimumConfidence !== 'number') {
-            throw new Error('Config value minimumConfidence must be a number')
-        }
-
-        if (config.minimumConfidence < 0) {
-            throw new Error('Config value minimumConfidence can not be lower than 0')
-        }
-
-        if (config.minimumConfidence > 1) {
-            throw new Error('Config value minimumConfidence can not be higher than 1')
-        }
-
         if (config.nGramMax < config.nGramMin) {
             throw new Error('Invalid nGramMin/nGramMax combination in config')
         }
@@ -66,7 +52,6 @@ class Model {
 
         this._nGramMin = config.nGramMin
         this._nGramMax = config.nGramMax
-        this._minimumConfidence = config.minimumConfidence
         this._vocabulary = config.vocabulary
         this._data = {...config.data}
     }
@@ -105,31 +90,6 @@ class Model {
         this._nGramMax = size
     }
 
-    /**
-     * Minimum confidence required for predictions
-     *
-     * @type {float}
-     */
-    get minimumConfidence() {
-        return this._minimumConfidence
-    }
-
-    set minimumConfidence(confidence) {
-        if (typeof confidence !== 'number') {
-            throw new Error('minimumConfidence must be a number')
-        }
-
-        if (confidence < 0) {
-            throw new Error('minimumConfidence can not be lower than 0')
-        }
-
-        if (confidence > 1) {
-            throw new Error('minimumConfidence can not be higher than 1')
-        }
-
-        this._minimumConfidence = confidence
-    }
-
     /**
      * Vocabulary instance 
      *
@@ -165,18 +125,16 @@ class Model {
     }
 
     /**
-     * Return the model in its current state for storing, including the configured
-     * n-gram min/max values, the minimum confidence required for for predictions,
-     * the vocabulary as an array (if any, otherwise false),and an object literal
-     * with all the training data
+     * Return the model in its current state an an object literal, including the
+     * configured n-gram min/max values, the vocabulary as an array (if any,
+     * otherwise false), and an object literal with all the training data
      *
      * @return {Object}
      */
     serialize() {
         return {
             nGramMin: this._nGramMin,
             nGramMax: this._nGramMax,
-            minimumConfidence: this._minimumConfidence,
             vocabulary: Array.from(this._vocabulary.terms),
             data: this._data
         }
diff --git a/test/classifier.js b/test/classifier.js
@@ -328,6 +328,24 @@ describe('Classifier', () => {
             expect(() => classifier.predict([])).to.throw(Error)
         })
 
+        it('should throw an error if minimumConfidence is not a number', () => {
+            const classifier = new Classifier()
+
+            expect(() => classifier.predict('', null, '')).to.throw(Error)
+        })
+
+        it('should throw an error if minimumConfidence is lower than 0', () => {
+            const classifier = new Classifier()
+
+            expect(() => classifier.predict('', null, -1)).to.throw(Error)
+        })
+
+        it('should throw an error if minimumConfidence is higher than 1', () => {
+            const classifier = new Classifier()
+
+            expect(() => classifier.predict('', null, 2)).to.throw(Error)
+        })
+
         it('should return an array', () => {
             const classifier = new Classifier()
 
diff --git a/test/model.js b/test/model.js
@@ -39,24 +39,6 @@ describe('Model', () => {
             })).to.throw(Error)
         })
 
-        it('should throw an error if minimumConfidence is not a number', () => {
-            expect(() => new Model({
-                minimumConfidence: 'test'
-            })).to.throw(Error)
-        })
-
-        it('should throw an error if minimumConfidence is lower than 0', () => {
-            expect(() => new Model({
-                minimumConfidence: -1
-            })).to.throw(Error)
-        })
-
-        it('should throw an error if minimumConfidence is higher than 1', () => {
-            expect(() => new Model({
-                minimumConfidence: 2
-            })).to.throw(Error)
-        })
-
         it('should throw an error if data is not an object literal', () => {
             expect(() => new Model({
                 data: []
@@ -129,54 +111,6 @@ describe('Model', () => {
         })
     })
 
-    describe('minimumConfidence', () => {
-        it('should return a number', () => {
-            const model = new Model()
-
-            expect(model.minimumConfidence).to.be.a('number')
-        })
-
-        it('should return the current minimumConfidence value', () => {
-            const model = new Model({
-                minimumConfidence: 0.5
-            })
-
-            expect(model.minimumConfidence).to.equal(0.5)
-        })
-
-        it('should set the minimumConfidence value', () => {
-            const model = new Model()
-
-            model.minimumConfidence = 0.1
-
-            expect(model.minimumConfidence).to.equal(0.1)
-        })
-
-        it('should throw an error if confidence is not a number', () => {
-            const model = new Model()
-
-            expect(() => {
-                model.minimumConfidence = 'test'
-            }).to.throw(Error)
-        })
-
-        it('should throw an error if confidence is lower than 0', () => {
-            const model = new Model()
-
-            expect(() => {
-                model.minimumConfidence = -1
-            }).to.throw(Error)
-        })
-
-        it('should throw an error if confidence is higher than 1', () => {
-            const model = new Model()
-
-            expect(() => {
-                model.minimumConfidence = 2
-            }).to.throw(Error)
-        })
-    })
-
     describe('vocabulary', () => {
         it('should return a vocabulary instance', () => {
             const model = new Model()
@@ -244,7 +178,6 @@ describe('Model', () => {
             expect(model.serialize()).to.eql({
                 nGramMin: 1,
                 nGramMax: 1,
-                minimumConfidence: 0.2,
                 vocabulary: [],
                 data: {}
             })

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "ml-classify-text",`
`3`		`- "version": "1.0.0",`
	`3`	`+ "version": "2.0.0",`
`4`	`4`	`"description": "Text classification using n-grams and cosine similarity",`
`5`	`5`	`"module": "./lib",`
`6`	`6`	`"main": "./lib",`