Update docs

andreekeberg · andreekeberg · commit 49f5fad75ec6 · 2020-08-28T01:01:07.000+02:00
diff --git a/README.md b/README.md
@@ -89,13 +89,13 @@ The following configuration options can be passed both directly to a new [Model]
 
 #### Options
 
-| Property | Description | Default |
-| --- | --- | --- |
-| **nGramMin** | Minimum n-gram size | `1` |
-| **nGramMax** | Maximum n-gram size | `1` |
-| **minimumConfidence** | Minimum confidence required for predictions | `0.2` |
-| **vocabulary** | Terms mapped to indexes in the model data entries, set to `false` to store terms directly in the data entries | `[]` |
-| **data** | Object literal containing all training data | `{}` |
+| Property | Type | Default | Description |
+| --- | --- | --- | --- |
+| **nGramMin** | `int` | `1` | Minimum n-gram size |
+| **nGramMax** | `int` | `1` | Maximum n-gram size |
+| **minimumConfidence** | `int` \| `float` | `0.2` | Minimum confidence required for predictions |
+| **vocabulary** | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data, set to `false` to store terms directly in the data entries |
+| **data** | `Object` | `{}` | Key-value store of labels and training data vectors |
 
 ### Using n-grams
 
@@ -113,7 +113,7 @@ const classifier = new Classifier({
 	nGramMax: 2
 })
 
-let tokens = tokenize('I really dont like it')
+let tokens = classifier.tokenize('I really dont like it')
 
 console.log(tokens)
 ```
diff --git a/docs/classifier.md b/docs/classifier.md
@@ -7,42 +7,86 @@
     * [.model](#Classifier+model) : <code>Model</code>
     * [.train([input], label)](#Classifier+train) ⇒ <code>this</code>
     * [.predict(input, [maxMatches], [minimumConfidence])](#Classifier+predict) ⇒ <code>Array</code>
+    * [.splitWords(input)](#Classifier+splitWords) ⇒ <code>Array</code>
+    * [.tokenize(input)](#Classifier+tokenize) ⇒ <code>Object</code>
+    * [.vectorize(tokens)](#Classifier+vectorize) ⇒ <code>Object</code>
+    * [.cosineSimilarity(v1, v2)](#Classifier+cosineSimilarity) ⇒ <code>float</code>
 
 <a name="new_Classifier_new"></a>
 
 ### new Classifier([model])
 
 | Param | Type | Default | Description |
 | --- | --- | --- | --- |
-| [model] | <code>Model</code> \| <code>Object</code> |  |  |
-| [model.nGramMin] | <code>int</code> | <code>1</code> | Minimum n-gram size |
-| [model.nGramMax] | <code>int</code> | <code>1</code> | Maximum n-gram size |
-| [model.minimumConfidence] | <code>int</code> \| <code>float</code> | <code>0.2</code> | Minimum confidence required for predictions |
-| [model.vocabulary] | <code>Array</code> \| <code>Set</code> \| <code>false</code> | <code>[]</code> | Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries |
-| [model.data] | <code>int</code> | <code>{}</code> | Key-value store containing all training data |
+| [model] | `Model` \| `Object` |  |  |
+| [model.nGramMin] | `int` | `1` | Minimum n-gram size |
+| [model.nGramMax] | `int` | `1` | Maximum n-gram size |
+| [model.minimumConfidence] | `int` \| `float` | `0.2` | Minimum confidence required for predictions |
+| [model.vocabulary] | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data, set to `false` to store terms directly in the data entries |
+| [model.data] | `Object` | `{}` | Key-value store of labels and training data vectors |
 
 <a name="Classifier+model"></a>
 
-### classifier.model : <code>Model</code>
+### classifier.model : `Model`
 Model instance
 
 <a name="Classifier+train"></a>
 
-### classifier.train([input], label) ⇒ <code>this</code>
+### classifier.train([input], label) ⇒ `this`
 Train the current model using an input string (or array of strings) and a corresponding label
 
 | Param | Type | Description |
 | --- | --- | --- |
-| [input] | <code>string</code> \| <code>Array.&lt;string&gt;</code> | String, or an array of strings |
-| label | <code>string</code> | Corresponding label |
+| input | `string` \| `Array` | String, or an array of strings |
+| label | `string` | Corresponding label |
 
 <a name="Classifier+predict"></a>
 
-### classifier.predict(input, [maxMatches], [minimumConfidence]) ⇒ <code>Array</code>
+### classifier.predict(input, [maxMatches], [minimumConfidence]) ⇒ `Array`
 Return an array of one or more Prediction instances
 
 | Param | Type | Default | Description |
 | --- | --- | --- | --- |
-| input | <code>string</code> |  | Input string to make a prediction from |
-| [maxMatches] | <code>int</code> | <code>1</code> | Maximum number of predictions to return |
-| [minimumConfidence] | <code>float</code> | <code>null</code> | Minimum confidence required to include a prediction |
+| input | `string` |  | Input string to make a prediction from |
+| [maxMatches] | `int` | `1` | Maximum number of predictions to return |
+| [minimumConfidence] | `float` | `null` | Minimum confidence required to include a prediction |
+
+<a name="Classifier+splitWords"></a>
+
+### classifier.splitWords(input) ⇒ `Array`
+Split a string into an array of lowercase words, with all non-letter characters removed
+
+| Param | Type |
+| --- | --- |
+| input | `string` | 
+
+<a name="Classifier+tokenize"></a>
+
+### classifier.tokenize(input) ⇒ `Object`
+Create an object literal of unique tokens (n-grams) as keys, and their
+respective occurrences as values based on an input string, or array of words
+
+| Param | Type |
+| --- | --- |
+| input | `string` \| `Array` | 
+
+<a name="Classifier+vectorize"></a>
+
+### classifier.vectorize(tokens) ⇒ `Object`
+Convert a tokenized object into a new object with all keys (terms)
+translated to their index in the vocabulary (adding all terms to
+the vocabulary that do not already exist)
+
+| Param | Type |
+| --- | --- |
+| tokens | `Object` | 
+
+<a name="Classifier+cosineSimilarity"></a>
+
+### classifier.cosineSimilarity(v1, v2) ⇒ `float`
+Return the cosine similarity between two vectors
+
+| Param | Type |
+| --- | --- |
+| v1 | `Object` | 
+| v2 | `Object` | 
diff --git a/docs/model.md b/docs/model.md
@@ -4,54 +4,54 @@
 
 * [Model](#Model)
     * [new Model([config])](#new_Model_new)
-    * [.nGramMin](#Model+nGramMin) : <code>number</code>
-    * [.nGramMax](#Model+nGramMax) : <code>number</code>
-    * [.minimumConfidence](#Model+minimumConfidence) : <code>number</code>
-    * [.vocabulary](#Model+vocabulary) : <code>Vocabulary</code> \| <code>false</code>
-    * [.data](#Model+data) : <code>Object</code>
-    * [.serialize()](#Model+serialize) ⇒ <code>Object</code>
+    * [.nGramMin](#Model+nGramMin) : `int`
+    * [.nGramMax](#Model+nGramMax) : `int`
+    * [.minimumConfidence](#Model+minimumConfidence) : `float`
+    * [.vocabulary](#Model+vocabulary) : `Vocabulary` \| `false`
+    * [.data](#Model+data) : `Object`
+    * [.serialize()](#Model+serialize) ⇒ `Object`
 
 <a name="new_Model_new"></a>
 
 ### new Model([config])
 
 | Param | Type | Default | Description |
 | --- | --- | --- | --- |
-| [config] | <code>Object</code> |  |  |
-| [config.nGramMin] | <code>int</code> | <code>1</code> | Minimum n-gram size |
-| [config.nGramMax] | <code>int</code> | <code>1</code> | Maximum n-gram size |
-| [config.minimumConfidence] | <code>int</code> \| <code>float</code> | <code>0.2</code> | Minimum confidence required for predictions |
-| [config.vocabulary] | <code>Array</code> \| <code>Set</code> \| <code>false</code> | <code>[]</code> | Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries |
-| [config.data] | <code>Object</code> | <code>{}</code> | Key-value store containing all training data |
+| [config] | `Object` |  |  |
+| [config.nGramMin] | `int` | `1` | Minimum n-gram size |
+| [config.nGramMax] | `int` | `1` | Maximum n-gram size |
+| [config.minimumConfidence] | `int` \| `float` | `0.2` | Minimum confidence required for predictions |
+| [config.vocabulary] | `Array` \| `Set` \| `false` | `[]` | Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries |
+| [config.data] | `Object` | `{}` | Key-value store containing all training data |
 
 <a name="Model+nGramMin"></a>
 
-### model.nGramMin : <code>number</code>
+### model.nGramMin : `int`
 Minimum n-gram size
 
 <a name="Model+nGramMax"></a>
 
-### model.nGramMax : <code>number</code>
+### model.nGramMax : `int`
 Maximum n-gram size
 
 <a name="Model+minimumConfidence"></a>
 
-### model.minimumConfidence : <code>number</code>
+### model.minimumConfidence : `float`
 Minimum confidence required for predictions
 
 <a name="Model+vocabulary"></a>
 
-### model.vocabulary : <code>Vocabulary</code> \| <code>false</code>
+### model.vocabulary : `Vocabulary` \| `false`
 Vocabulary instance
 
 <a name="Model+data"></a>
 
-### model.data : <code>Object</code>
+### model.data : `Object`
 Model data
 
 <a name="Model+serialize"></a>
 
-### model.serialize() ⇒ <code>Object</code>
+### model.serialize() ⇒ `Object`
 Return the model in its current state for storing, including the configured
 n-gram min/max values, the minimum confidence required for for predictions,
 the vocabulary as an array (if any, otherwise false), and an object literal
diff --git a/docs/prediction.md b/docs/prediction.md
@@ -3,15 +3,15 @@
 ## Prediction
 
 * [Prediction](#Prediction)
-    * [.label](#Prediction+label) : <code>string</code>
-    * [.confidence](#Prediction+confidence) : <code>number</code>
+    * [.label](#Prediction+label) : `string`
+    * [.confidence](#Prediction+confidence) : `number`
 
 <a name="Prediction+label"></a>
 
-### prediction.label : <code>string</code>
+### prediction.label : `string`
 Label of the prediction
 
 <a name="Prediction+confidence"></a>
 
-### prediction.confidence : <code>number</code>
+### prediction.confidence : `number`
 Confidence of the prediction
diff --git a/docs/vocabulary.md b/docs/vocabulary.md
@@ -4,64 +4,64 @@
 
 * [Vocabulary](#Vocabulary)
     * [new Vocabulary(terms)](#new_Vocabulary_new)
-    * [.size](#Vocabulary+size) : <code>number</code>
-    * [.terms](#Vocabulary+terms) : <code>Array</code> \| <code>Set</code>
-    * [.add(terms)](#Vocabulary+add) ⇒ <code>this</code>
-    * [.remove(terms)](#Vocabulary+remove) ⇒ <code>this</code>
-    * [.has(term)](#Vocabulary+has) ⇒ <code>bool</code>
-    * [.indexOf(term)](#Vocabulary+indexOf) ⇒ <code>number</code>
+    * [.size](#Vocabulary+size) : `number`
+    * [.terms](#Vocabulary+terms) : `Array` \| `Set`
+    * [.add(terms)](#Vocabulary+add) ⇒ `this`
+    * [.remove(terms)](#Vocabulary+remove) ⇒ `this`
+    * [.has(term)](#Vocabulary+has) ⇒ `bool`
+    * [.indexOf(term)](#Vocabulary+indexOf) ⇒ `number`
 
 <a name="new_Vocabulary_new"></a>
 
 ### new Vocabulary(terms)
 
 | Param | Type |
 | --- | --- |
-| terms | <code>Array</code> \| <code>Set</code> | 
+| terms | `Array` \| `Set` | 
 
 <a name="Vocabulary+size"></a>
 
-### vocabulary.size : <code>number</code>
+### vocabulary.size : `number`
 Vocabulary size
 
 <a name="Vocabulary+terms"></a>
 
-### vocabulary.terms : <code>Array</code> \| <code>Set</code>
+### vocabulary.terms : `Array` \| `Set`
 Vocabulary terms
 
 <a name="Vocabulary+add"></a>
 
-### vocabulary.add(terms) ⇒ <code>this</code>
+### vocabulary.add(terms) ⇒ `this`
 Add one or more terms to the vocabulary
 
 | Param | Type |
 | --- | --- |
-| terms | <code>string</code> \| <code>Array</code> \| <code>Set</code> | 
+| terms | `string` \| `Array` \| `Set` | 
 
 <a name="Vocabulary+remove"></a>
 
-### vocabulary.remove(terms) ⇒ <code>this</code>
+### vocabulary.remove(terms) ⇒ `this`
 Remove one or more terms from the vocabulary
 
 | Param | Type |
 | --- | --- |
-| terms | <code>string</code> \| <code>Array</code> \| <code>Set</code> | 
+| terms | `string` \| `Array` \| `Set` | 
 
 <a name="Vocabulary+has"></a>
 
-### vocabulary.has(term) ⇒ <code>bool</code>
+### vocabulary.has(term) ⇒ `bool`
 Return whether the vocabulary contains a certain term
 
 | Param | Type |
 | --- | --- |
-| term | <code>string</code> | 
+| term | `string` | 
 
 <a name="Vocabulary+indexOf"></a>
 
-### vocabulary.indexOf(term) ⇒ <code>number</code>
+### vocabulary.indexOf(term) ⇒ `number`
 Return the index of a term in the vocabulary (returns -1 if not found)
 
 | Param | Type |
 | --- | --- |
-| term | <code>string</code> | 
+| term | `string` | 
 
diff --git a/src/classifier.js b/src/classifier.js
@@ -3,12 +3,12 @@ import Model from './model'
 import Prediction from './prediction'
 
 /**
- * @param {Model|Object} [model]
+ * @param {(Model|Object)} [model]
  * @param {int} [model.nGramMin=1] - Minimum n-gram size
  * @param {int} [model.nGramMax=1] - Maximum n-gram size
  * @param {(int|float)} [model.minimumConfidence=0.2] - Minimum confidence required for predictions
  * @param {(Array|Set|false)} [model.vocabulary=[]] - Terms mapped to indexes in the model data entries, set to false to store terms directly in the data entries
- * @param {int} [model.data={}] - Key-value store containing all training data
+ * @param {Object} [model.data={}] - Key-value store containing all training data
  * @constructor
  */
 class Classifier {
@@ -144,7 +144,6 @@ class Classifier {
      * 
      * @param {string} input
      * @return {Array}
-     * @private
      */
     splitWords(input) {
         if (typeof input !== 'string') {
@@ -166,7 +165,6 @@ class Classifier {
      *
      * @param {(string|string[])} input
      * @return {Object}
-     * @private
      */
     tokenize(input) {
         let words = typeof input === 'string' ? this.splitWords(input) : input
@@ -210,9 +208,8 @@ class Classifier {
      * translated to their index in the vocabulary (adding all terms to
      * the vocabulary that do not already exist)
      *
-     * @param {object} tokens
-     * @return {object}
-     * @private
+     * @param {Object} tokens
+     * @return {Object}
      */
     vectorize(tokens) {
         if (!(tokens instanceof Object) || tokens.constructor !== Object) {
@@ -247,7 +244,6 @@ class Classifier {
      * @param {Object} v1
      * @param {Object} v2
      * @return {float}
-     * @private
      */
     cosineSimilarity(v1, v2) {
         if (!(v1 instanceof Object) || v1.constructor !== Object) {
diff --git a/src/model.js b/src/model.js
@@ -74,7 +74,7 @@ class Model {
     /**
      * Minimum n-gram size
      *
-     * @type {number}
+     * @type {int}
      */
     get nGramMin() {
         return this._nGramMin
@@ -91,7 +91,7 @@ class Model {
     /**
      * Maximum n-gram size
      *
-     * @type {number}
+     * @type {int}
      */
     get nGramMax() {
         return this._nGramMax
@@ -108,7 +108,7 @@ class Model {
     /**
      * Minimum confidence required for predictions
      *
-     * @type {number}
+     * @type {float}
      */
     get minimumConfidence() {
         return this._minimumConfidence