BLKSerene
diff --git a/‎.circleci/config.yml‎
Lines changed: 6 additions & 0 deletions b/‎.circleci/config.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎appveyor.yml‎
Lines changed: 6 additions & 0 deletions b/‎appveyor.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎azure-pipelines.yml‎
Lines changed: 6 additions & 0 deletions b/‎azure-pipelines.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tests/tests_measures/test_measures_readability.py‎
Lines changed: 4 additions & 4 deletions b/‎tests/tests_measures/test_measures_readability.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/tests_nlp/test_dependency_parsing.py‎
Lines changed: 61 additions & 47 deletions b/‎tests/tests_nlp/test_dependency_parsing.py‎
Lines changed: 61 additions & 47 deletions
@@ -52,7 +52,9 @@ jobs:
           command: |
             # Run some tests separately to reduce memory usage
             pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+            pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
             pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+            pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
             pytest tests/tests_nlp/test_dependency_parsing.py
             pytest tests/tests_nlp/test_lemmatization.py
@@ -121,7 +123,9 @@ jobs:
           command: |
             # Run some tests separately to reduce memory usage
             pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+            pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
             pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+            pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
             pytest tests/tests_nlp/test_dependency_parsing.py
             pytest tests/tests_nlp/test_lemmatization.py
@@ -194,7 +198,9 @@ jobs:
             # Use "python -m pytest" instead to fix "pytest: command not found"
             # Run some tests separately to reduce memory usage
             python3.11 -m pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+            python3.11 -m pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
             python3.11 -m pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+            python3.11 -m pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
             python3.11 -m pytest tests/tests_nlp/test_dependency_parsing.py
             python3.11 -m pytest tests/tests_nlp/test_lemmatization.py
 
@@ -51,7 +51,9 @@ jobs:
         run: |
           # Run some tests separately to reduce memory usage
           pytest --cov=./ --cov-report=xml --cov-append tests/tests_nlp/tests_spacy/test_spacy_eng.py
+          pytest --cov=./ --cov-report=xml --cov-append tests/tests_nlp/tests_spacy/test_spacy_misc.py
           pytest --cov=./ --cov-report=xml --cov-append tests/tests_nlp/tests_stanza/test_stanza_eng.py
+          pytest --cov=./ --cov-report=xml --cov-append tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
           pytest --cov=./ --cov-report=xml --cov-append tests/tests_nlp/test_dependency_parsing.py
           pytest --cov=./ --cov-report=xml --cov-append tests/tests_nlp/test_lemmatization.py
@@ -113,7 +115,9 @@ jobs:
         run: |
           # Run some tests separately to reduce memory usage
           pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+          pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
           pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+          pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
           pytest tests/tests_nlp/test_dependency_parsing.py
           pytest tests/tests_nlp/test_lemmatization.py
@@ -173,7 +177,9 @@ jobs:
 
           # Run some tests separately to reduce memory usage
           pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+          pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
           pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+          pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
           pytest tests/tests_nlp/test_dependency_parsing.py
           pytest tests/tests_nlp/test_lemmatization.py
 
@@ -30,6 +30,8 @@
 - Work Area: Add Work Area - Sample
 
 ### ✨ Improvements
+- Settings: Update Settings - Files - Miscellaneous Settings - Read files in chunks of characters
+- Settings: Update Settings - Sentiment Analysis - Preview
 - Utils: Update Stanza's Serbian (Latin script) sentence tokenizer, part-of-speech tagger, and dependency parser
 - Utils: Update Wordless's sentence splitter and sentence segment tokenizer
 - Work Area: Parallel Concordancer - Parallel Unit No. - Empty parallel units are not counted
 
@@ -63,7 +63,9 @@ for:
     test_script:
       # Run some tests separately to reduce memory usage
       - pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+      - pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
       - pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+      - pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
       - pytest tests/tests_nlp/test_dependency_parsing.py
       - pytest tests/tests_nlp/test_lemmatization.py
@@ -112,7 +114,9 @@ for:
     test_script:
       # Run some tests separately to reduce memory usage
       - pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+      - pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
       - pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+      - pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
       - pytest tests/tests_nlp/test_dependency_parsing.py
       - pytest tests/tests_nlp/test_lemmatization.py
@@ -164,7 +168,9 @@ for:
 
       # Run some tests separately to reduce memory usage
       - pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+      - pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
       - pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+      - pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
       - pytest tests/tests_nlp/test_dependency_parsing.py
       - pytest tests/tests_nlp/test_lemmatization.py
 
@@ -52,7 +52,9 @@ jobs:
       - script: |
           :: Run some tests separately to reduce memory usage
           pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+          pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
           pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+          pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
           pytest tests/tests_nlp/test_dependency_parsing.py
           pytest tests/tests_nlp/test_lemmatization.py
@@ -113,7 +115,9 @@ jobs:
       - script: |
           # Run some tests separately to reduce memory usage
           pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+          pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
           pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+          pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
           pytest tests/tests_nlp/test_dependency_parsing.py
           pytest tests/tests_nlp/test_lemmatization.py
@@ -177,7 +181,9 @@ jobs:
 
           # Run some tests separately to reduce memory usage
           pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
+          pytest tests/tests_nlp/tests_spacy/test_spacy_misc.py
           pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
+          pytest tests/tests_nlp/tests_stanza/test_stanza_misc.py
 
           pytest tests/tests_nlp/test_dependency_parsing.py
           pytest tests/tests_nlp/test_lemmatization.py
 
@@ -173,8 +173,8 @@ def test_colemans_readability_formula():
     assert cloze_pct_eng_0 == 'text_too_short'
     assert cloze_pct_eng_12_1 == 1.29 * (9 / 12 * 100) - 38.45
     assert cloze_pct_eng_12_2 == 1.16 * (9 / 12 * 100) + 1.48 * (3 / 12 * 100) - 37.95
-    assert cloze_pct_eng_12_3 == 1.07 * (9 / 12 * 100) + 1.18 * (3 / 12 * 100) + 0.76 * (0 / 12 * 100) - 34.02
-    assert cloze_pct_eng_12_4 == 1.04 * (9 / 12 * 100) + 1.06 * (3 / 12 * 100) + 0.56 * (0 / 12 * 100) - 0.36 * (0 / 12) - 26.01
+    assert cloze_pct_eng_12_3 == 1.07 * (9 / 12 * 100) + 1.18 * (3 / 12 * 100) + 0.76 * (3 / 12 * 100) - 34.02
+    assert cloze_pct_eng_12_4 == 1.04 * (9 / 12 * 100) + 1.06 * (3 / 12 * 100) + 0.56 * (3 / 12 * 100) - 0.36 * (0 / 12) - 26.01
     assert cloze_pct_tha_12 != 'no_support'
     assert cloze_pct_other_12 == 'no_support'
 
@@ -427,8 +427,8 @@ def test_lorge_readability_index():
     lorge_spa_12 = wl_measures_readability.lorge_readability_index(main, test_text_spa_12)
 
     assert lorge_eng_0 == 'text_too_short'
-    assert lorge_eng_12_corrected == 12 / 3 * 0.06 + 2 / 12 * 0.1 + 2 / 12 * 0.1 + 1.99
-    assert lorge_eng_12 == 12 / 3 * 0.07 + 2 / 12 * 13.01 + 2 / 12 * 10.73 + 1.6126
+    assert lorge_eng_12_corrected == 12 / 3 * 0.06 + 0 / 12 * 0.1 + 2 / 12 * 0.1 + 1.99
+    assert lorge_eng_12 == 12 / 3 * 0.07 + 0 / 12 * 13.01 + 2 / 12 * 10.73 + 1.6126
     assert lorge_spa_12 == 'no_support'
 
 def test_luong_nguyen_dinhs_readability_formula():
 
@@ -24,6 +24,7 @@
 )
 from wordless.wl_nlp import (
     wl_dependency_parsing,
+    wl_nlp_utils,
     wl_texts,
     wl_word_tokenization
 )
@@ -47,10 +48,14 @@ def test_dependency_parse(lang, dependency_parser):
         lang = lang
     )
 
-    wl_test_dependency_parse_models(lang, dependency_parser, test_sentence, tokens, '')
-    wl_test_dependency_parse_fig_models(lang, dependency_parser, test_sentence, tokens)
+    wl_test_dependency_parse_models(lang, dependency_parser, tokens, '')
+    wl_test_dependency_parse_fig_models(lang, dependency_parser, tokens)
+
+def wl_test_dependency_parse_models(lang, dependency_parser, tokens, results):
+    print(f'{lang} / {dependency_parser}:')
+
+    test_sentence = getattr(wl_test_lang_examples, f'SENTENCE_{lang.upper()}')
 
-def wl_test_dependency_parse_models(lang, dependency_parser, test_sentence, tokens, results):
     # Untokenized
     tokens_untokenized = wl_dependency_parsing.wl_dependency_parse(
         main,
@@ -63,7 +68,6 @@ def wl_test_dependency_parse_models(lang, dependency_parser, test_sentence, toke
         for token in tokens_untokenized
     ]
 
-    print(f'{lang} / {dependency_parser}:')
     print(f'{dependencies_untokenized}\n')
 
     # Tokenized
@@ -92,6 +96,16 @@ def wl_test_dependency_parse_models(lang, dependency_parser, test_sentence, toke
     # Tokenization should not be modified
     assert len(tokens) == len(dependencies_tokenized)
 
+    # Newlines
+    tokens_newlines = wl_dependency_parsing.wl_dependency_parse(
+        main,
+        inputs = wl_test_lang_examples.TEXT_NEWLINES,
+        lang = lang,
+        dependency_parser = dependency_parser
+    )
+
+    assert wl_texts.to_token_texts(tokens_newlines) == wl_nlp_utils.clean_texts(wl_test_lang_examples.TEXT_NEWLINES)
+
     # Tagged
     main.settings_custom['files']['tags']['body_tag_settings'] = [['Embedded', 'Part of speech', '_*', 'N/A']]
 
@@ -108,16 +122,6 @@ def wl_test_dependency_parse_models(lang, dependency_parser, test_sentence, toke
 
     assert dependencies_tagged == dependencies_tokenized
 
-    # Long
-    tokens_long = wl_dependency_parsing.wl_dependency_parse(
-        main,
-        inputs = wl_texts.to_tokens(wl_test_lang_examples.TOKENS_LONG, lang = lang),
-        lang = lang,
-        dependency_parser = dependency_parser
-    )
-
-    assert [str(token) for token in tokens_long] == wl_test_lang_examples.TOKENS_LONG
-
     # Parsed
     heads_orig = ['test_head']
     tokens_parsed = wl_dependency_parsing.wl_dependency_parse(
@@ -129,72 +133,82 @@ def wl_test_dependency_parse_models(lang, dependency_parser, test_sentence, toke
 
     assert [str(token.head) for token in tokens_parsed] == heads_orig
 
-def wl_test_dependency_parse_fig_models(lang, dependency_parser, test_sentence, tokens):
+    # Long
+    if dependency_parser.startswith(('spacy_', 'stanza_')):
+        main.settings_custom['files']['misc_settings']['read_files_in_chunks_chars'] = 99
+
+        tokens_long = wl_dependency_parsing.wl_dependency_parse(
+            main,
+            inputs = '\n'.join(wl_test_lang_examples.TOKENS_LONG),
+            lang = lang,
+            dependency_parser = dependency_parser
+        )
+
+        assert wl_texts.to_token_texts(tokens_long) == wl_test_lang_examples.TOKENS_LONG
+
+        tokens_long = wl_dependency_parsing.wl_dependency_parse(
+            main,
+            inputs = wl_texts.to_tokens(wl_test_lang_examples.TOKENS_LONG, lang = lang),
+            lang = lang,
+            dependency_parser = dependency_parser
+        )
+
+        assert wl_texts.to_token_texts(tokens_long) == wl_test_lang_examples.TOKENS_LONG
+
+        main.settings_custom['files']['misc_settings']['read_files_in_chunks_chars'] = main.settings_default['files']['misc_settings']['read_files_in_chunks_chars']
+
+def wl_test_dependency_parse_fig_models(lang, dependency_parser, tokens):
     print(f'{lang} / {dependency_parser} (figure)')
 
+    test_sentence = getattr(wl_test_lang_examples, f'SENTENCE_{lang.upper()}')
+
     # Untokenized
     html_untokenized = wl_dependency_parsing.wl_dependency_parse_fig(
         main,
         inputs = test_sentence,
         lang = lang,
-        dependency_parser = dependency_parser,
-        show_in_separate_tabs = False
-    )
-    html_untokenized_separate_tabs = wl_dependency_parsing.wl_dependency_parse_fig(
-        main,
-        inputs = test_sentence,
-        lang = lang,
-        dependency_parser = dependency_parser,
-        show_in_separate_tabs = True
+        dependency_parser = dependency_parser
     )
 
     # Tokenized
     html_tokenized = wl_dependency_parsing.wl_dependency_parse_fig(
         main,
-        inputs = tokens,
-        lang = lang,
-        dependency_parser = dependency_parser,
-        show_in_separate_tabs = False
-    )
-    html_tokenized_separate_tabs = wl_dependency_parsing.wl_dependency_parse_fig(
-        main,
-        inputs = tokens,
+        inputs = [tokens],
         lang = lang,
-        dependency_parser = dependency_parser,
-        show_in_separate_tabs = True
+        dependency_parser = dependency_parser
     )
 
     # Check for empty HTMLs
     assert html_untokenized
-    assert html_untokenized_separate_tabs
     assert html_tokenized
-    assert html_tokenized_separate_tabs
 
 # RTL languages
 def test_dependency_parse_fig_rtl_langs():
-    html = wl_dependency_parsing.wl_dependency_parse_fig(
+    html_untokenized = wl_dependency_parsing.wl_dependency_parse_fig(
         main,
         inputs = 'test',
         lang = 'ara'
     )
-    html = wl_dependency_parsing.wl_dependency_parse_fig(
+
+    html_tokenized = wl_dependency_parsing.wl_dependency_parse_fig(
         main,
-        inputs = [wl_texts.Wl_Token('test')],
+        inputs = [[wl_texts.Wl_Token('test', lang = 'ara')]],
         lang = 'ara'
     )
 
-    assert html
+    assert html_untokenized
+    assert html_tokenized
 
-def test__get_pipelines_disabled():
-    wl_dependency_parsing._get_pipelines_disabled(show_pos_tags = True, show_lemmas = True)
-    wl_dependency_parsing._get_pipelines_disabled(show_pos_tags = True, show_lemmas = False)
-    wl_dependency_parsing._get_pipelines_disabled(show_pos_tags = False, show_lemmas = True)
-    wl_dependency_parsing._get_pipelines_disabled(show_pos_tags = False, show_lemmas = False)
+def test__get_pipelines_to_disable():
+    wl_dependency_parsing._get_pipelines_to_disable(show_pos_tags = True, show_lemmas = True)
+    wl_dependency_parsing._get_pipelines_to_disable(show_pos_tags = True, show_lemmas = False)
+    wl_dependency_parsing._get_pipelines_to_disable(show_pos_tags = False, show_lemmas = True)
+    wl_dependency_parsing._get_pipelines_to_disable(show_pos_tags = False, show_lemmas = False)
 
 def test_wl_show_dependency_graphs():
     htmls = wl_dependency_parsing.wl_dependency_parse_fig(
         main,
-        inputs = 'test',
+        inputs = wl_test_lang_examples.TEXT_NEWLINES,
         lang = 'eng_us',
         dependency_parser = 'stanza_eng'
     )
@@ -207,5 +221,5 @@ def test_wl_show_dependency_graphs():
         test_dependency_parse(lang, dependency_parser)
 
     test_dependency_parse_fig_rtl_langs()
-    test__get_pipelines_disabled()
+    test__get_pipelines_to_disable()
     test_wl_show_dependency_graphs()