andreekeberg
diff --git a/‎.babelrc‎
Lines changed: 21 additions & 0 deletions b/‎.babelrc‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎.editorconfig‎
Lines changed: 15 additions & 0 deletions b/‎.editorconfig‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎.eslintrc‎
Lines changed: 19 additions & 0 deletions b/‎.eslintrc‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 18 additions & 0 deletions b/‎.gitignore‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎.npmignore‎
Lines changed: 15 additions & 0 deletions b/‎.npmignore‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎.travis.yml‎
Lines changed: 10 additions & 0 deletions b/‎.travis.yml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 9 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 41 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 191 additions & 0 deletions b/‎README.md‎
Lines changed: 191 additions & 0 deletions
@@ -0,0 +1,21 @@
+{
+  "env": {
+    "development": {
+      "presets": [
+        ["@babel/env"]
+      ],
+      "plugins": [
+        "add-module-exports"
+      ]
+    },
+    "production": {
+      "presets": [
+        ["@babel/env"],
+        "minify"
+      ],
+      "plugins": [
+        "add-module-exports"
+      ]
+    }
+  }
+}
@@ -0,0 +1,15 @@
+# EditorConfig helps developers define and maintain
+# consistent coding styles between different editors and IDEs.
+
+root = true
+
+[*]
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+indent_style = space
+indent_size = 4
+
+[*.md]
+trim_trailing_whitespace = false
@@ -0,0 +1,19 @@
+{
+  "parserOptions": {
+    "ecmaVersion": 9,
+    "sourceType": "module"
+  },
+  "rules": {
+    "semi": ["warn", "never"],
+    "no-mixed-spaces-and-tabs": "warn",
+    "indent": [
+      "warn"
+    ],
+    "max-statements-per-line": [
+      "warn",
+      {
+        "max": 2
+      }
+    ]
+  }
+}
@@ -0,0 +1,18 @@
+logs
+*.log
+npm-debug.log*
+pids
+*.pid
+*.seed
+lib-cov
+coverage
+.nyc_output
+node_modules
+jspm_packages
+.npm
+.node_repl_history
+.idea
+lib
+package-lock.json
+yarn.lock
+.DS_Store
@@ -0,0 +1,15 @@
+*.log
+npm-debug.log*
+coverage
+.nyc_output
+node_modules
+package-lock.json
+yarn.lock
+src
+test
+CHANGELOG.md
+.travis.yml
+.editorconfig
+.eslintrc
+.babelrc
+.gitignore
@@ -0,0 +1,10 @@
+language: node_js
+node_js:
+  - '8'
+  - '6'
+script:
+  - npm run test
+  - npm run build
+branches:
+  only:
+    - master
@@ -0,0 +1,9 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+## [1.0.0] - 2020-08-26
+
+Initial release
+
+[1.0.0]: https://github.com/andreekeberg/ml-classify-text-js/releases/tag/1.0.0
@@ -0,0 +1,41 @@
+# Contributing to JavaScript Text Classifier
+
+This document contains basic guidelines to make contributing to this project as easy and transparent as possible, whether it's:
+
+- Reporting a bug
+- Discussing the current state of the code
+- Submitting a fix
+- Proposing new features
+- Becoming a maintainer
+
+## Pull requests are actively welcomed
+
+1. Fork the repo and create your branch from `master`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+5. Make sure your code lints.
+6. Issue your pull request.
+
+## Any contributions you make will be under the MIT Software License
+
+In short, when you submit code changes, your submissions are understood to be under the same [MIT License](http://choosealicense.com/licenses/mit/) that covers the project.
+
+## Report bugs using [issues](https://github.com/andreekeberg/ml-classify-text-js/issues)
+
+All bugs are tracked using GitHub issues to track public bugs. Report a bug by [opening a new issue](https://github.com/andreekeberg/ml-classify-text-js/issues/new); it's that easy!
+
+## Write bug reports with detail, background, and sample code
+
+**Great bug reports** tend to have:
+
+- A quick summary and/or background
+- Steps to reproduce
+  - Be specific!
+  - Give sample code if you can.
+  - What you expected would happen
+  - What actually happens
+- Notes (possibly including why you think this might be happening, or stuff you tried that didn't work)
+
+## License
+
+By contributing, you agree that your contributions will be licensed under its MIT License.
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2020 André Ekeberg <[email protected]>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,191 @@
+# 📄 JavaScript Text Classifier
+
+Use machine learning to classify text using [n-grams](https://en.wikipedia.org/wiki/N-gram) and [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity).
+
+Minimal library that can be used both in the **browser** and in **Node.js**, that allows you to train a model with a large amount of text samples (and corresponding labels), and then use this model to quickly predict one or more appropriate labels for new text samples.
+
+## Installation
+
+**Using npm**
+
+```
+npm install ml-classify-text
+```
+
+**Using yarn**
+
+```
+yarn add ml-classify-text
+```
+
+## Getting started
+
+**Import as an ES6 module**
+
+```javascript
+import Classifier from 'ml-classify-text'
+```
+
+**Import as a CommonJS module**
+
+```javascript
+const { Classifier } = require('ml-classify-text')
+```
+
+## Basic usage
+
+### Setting up a new Classifier instance
+
+```javascript
+const classifier = new Classifier()
+```
+
+### Training a model
+
+```javascript
+let positive = [
+    'This is great, so cool!',
+    'Wow, I love it!',
+    'It really is amazing',
+]
+
+let negative = [
+    'This is really bad',
+    'I hate it with a passion',
+    'Just terrible!',
+]
+
+classifier.train(positive, 'positive')
+classifier.train(negative, 'negative')
+```
+
+### Getting a prediction
+
+```javascript
+let predictions = classifier.predict('It sure is pretty great!')
+
+if (predictions.length) {
+	predictions.forEach(prediction => {
+		console.log(`${prediction.label} (${prediction.confidence})`)
+	})
+} else {
+	console.log('No predictions returned')
+}
+```
+
+Returning:
+
+```
+positive (0.5423261445466404)
+```
+
+## Advanced usage
+
+### Configuration
+
+The following configuration options can be passed both directly to a new [Model](docs/model.md), or indirectly by passing it to the [Classifier](docs/classifier.md) constructor.
+
+#### Options
+
+| Property | Description | Default |
+| --- | --- | --- |
+| **nGramMin** | Minimum n-gram size | `1` |
+| **nGramMax** | Maximum n-gram size | `1` |
+| **minimumConfidence** | Minimum confidence required for predictions | `0.2` |
+| **vocabulary** | Terms mapped to indexes in the model data entries, set to `false` to store terms directly in the data entries | `[]` |
+| **data** | Object literal containing all training data | `{}` |
+
+### Using n-grams
+
+The default behavior is to split up texts by single words (known as a [bag of words](https://en.wikipedia.org/wiki/Bag-of-words_model), or unigrams).
+
+This has a few limitations, since by ignoring the order of words, it's impossible to correctly match phrases and expressions.
+
+In comes [n-grams](https://en.wikipedia.org/wiki/N-gram), which, when set to use more than one word per term, act like a sliding window that moves across the text — a continuous sequence of words of the specified amount, which can greatly improve the accuracy of predictions.
+
+#### Example of using n-grams with a size of 2 (bigrams)
+
+```javascript
+const classifier = new Classifier({
+	nGramMin: 2,
+	nGramMax: 2
+})
+
+let tokens = tokenize('I really dont like it')
+
+console.log(tokens)
+```
+
+Returning:
+
+```javascript
+{
+	'i really': 1,
+	'really dont': 1,
+	'dont like': 1,
+	'like it': 1
+}
+```
+
+### Serializing a model
+
+After training a model with large sets of data, you'll want to store all this data, to allow you to simply set up a new model using this training data at another time, and quicky make predictions.
+
+To do this, simply use the `serialize` method on your [Model](docs/model.md), and either save the data structure to a file, send it to a server, or store it in any other way you want.
+
+```javascript
+let model = classifier.model
+
+console.log(model.serialize())
+```
+
+Returning:
+
+```
+{
+    nGramMin: 1,
+    nGramMax: 1,
+    minimumConfidence: 0.2,
+    vocabulary: [
+    	'this',    'is',      'great',
+    	'so',      'cool',    'wow',
+    	'i',       'love',    'it',
+    	'really',  'amazing', 'bad',
+    	'hate',    'with',    'a',
+    	'passion', 'just',    'terrible'
+    ],
+    data: {
+        positive: {
+            '0': 1, '1': 2, '2': 1,
+            '3': 1, '4': 1, '5': 1,
+            '6': 1, '7': 1, '8': 2,
+            '9': 1, '10': 1
+        },
+        negative: {
+            '0': 1, '1': 1, '6': 1,
+            '8': 1, '9': 1, '11': 1,
+            '12': 1, '13': 1, '14': 1,
+            '15': 1, '16': 1, '17': 1
+        }
+    }
+}
+```
+
+## Documentation
+
+* [Classifier](docs/classifier.md)
+* [Model](docs/model.md)
+* [Vocabulary](docs/vocabulary.md)
+* [Prediction](docs/prediction.md)
+
+## Contributing
+
+Read the [contribution guidelines](CONTRIBUTING.md).
+
+## Changelog
+
+Refer to the [changelog](CHANGELOG.md) for a full history of the project.
+
+## License
+
+JavaScript Text Classifier is licensed under the [MIT license](LICENSE).