Skip to content

Commit d434348

Browse files
committed
Bugfixes
2 parents db5bed3 + 7b21650 commit d434348

File tree

5 files changed

+33
-37
lines changed

5 files changed

+33
-37
lines changed

Manifest.toml

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ version = "0.2.0"
99
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
1010

1111
[[BinaryProvider]]
12-
deps = ["Libdl", "Pkg", "SHA", "Test"]
13-
git-tree-sha1 = "055eb2690182ebc31087859c3dd8598371d3ef9e"
12+
deps = ["Libdl", "SHA"]
13+
git-tree-sha1 = "c7361ce8a2129f20b0e05a89f7070820cfed6648"
1414
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
15-
version = "0.5.3"
15+
version = "0.5.4"
1616

1717
[[DataStructures]]
1818
deps = ["InteractiveUtils", "OrderedCollections", "Random", "Serialization", "Test"]
@@ -48,9 +48,6 @@ git-tree-sha1 = "fc6ee05e35074a66dc12a716065a25d9deece6fb"
4848
uuid = "8ef0a80b-9436-5d2c-a485-80b904378c43"
4949
version = "0.4.2"
5050

51-
[[LibGit2]]
52-
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
53-
5451
[[Libdl]]
5552
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
5653

@@ -70,22 +67,14 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804"
7067

7168
[[OrderedCollections]]
7269
deps = ["Random", "Serialization", "Test"]
73-
git-tree-sha1 = "85619a3f3e17bb4761fe1b1fd47f0e979f964d5b"
70+
git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
7471
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
75-
version = "1.0.2"
76-
77-
[[Pkg]]
78-
deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
79-
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
72+
version = "1.1.0"
8073

8174
[[Printf]]
8275
deps = ["Unicode"]
8376
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
8477

85-
[[REPL]]
86-
deps = ["InteractiveUtils", "Markdown", "Sockets"]
87-
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
88-
8978
[[Random]]
9079
deps = ["Serialization"]
9180
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -117,10 +106,6 @@ version = "0.3.0"
117106
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
118107
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
119108

120-
[[UUIDs]]
121-
deps = ["Random", "SHA"]
122-
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
123-
124109
[[Unicode]]
125110
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
126111

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
## StringAnalysis Release Notes
22

3+
v0.3.6
4+
------
5+
- Bugfix release
6+
37
v0.3.5
48
------
59
- Improved LSA embedding performance

Project.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "StringAnalysis"
22
uuid = "b66b7d2f-f536-51df-9f97-4dfb9d27c005"
33
authors = ["Corneliu Cofaru <[email protected]>"]
4-
version = "0.3.5"
4+
version = "0.3.6"
55

66
[deps]
77
AutoHashEquals = "15f4f7f2-30c1-5605-9d31-71845cf9641f"
@@ -18,3 +18,6 @@ TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e"
1818
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1919
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
2020
WordTokenizers = "796a5d58-b03d-544a-977e-18100b691f6e"
21+
22+
[compat]
23+
julia = "1"

docs/src/examples.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,11 @@ using Languages
113113
it = StringDocument("Quest'e un piccolo esempio di come si puo fare l'analisi");
114114
StringAnalysis.language!(it, Languages.Italian());
115115
prepare!(it, strip_articles|strip_prepositions|strip_whitespace);
116-
it.text
116+
text(it)
117+
```
118+
In the case of `AbstractString`s, the language has to be explicitly defined:
119+
```@repl index
120+
prepare("Nous sommes tous d'accord avec les examples!", stem_words, language=Languages.French())
117121
```
118122

119123
## Features

src/preprocessing.jl

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -354,16 +354,16 @@ function prepare!(entity, # can be an AbstractDocument or Corpus
354354
((flags & strip_single_chars) > 0) && push!(rpatterns, strip2regex[strip_single_chars])
355355
end
356356
# known words
357-
lang = _language(entity)
357+
language = _language(entity)
358358
if (flags & strip_articles) > 0
359-
union!(skip_words, articles(lang))
359+
union!(skip_words, articles(language))
360360
else
361-
((flags & strip_indefinite_articles) > 0) && union!(skip_words, indefinite_articles(lang))
362-
((flags & strip_definite_articles) > 0) && union!(skip_words, definite_articles(lang))
361+
((flags & strip_indefinite_articles) > 0) && union!(skip_words, indefinite_articles(language))
362+
((flags & strip_definite_articles) > 0) && union!(skip_words, definite_articles(language))
363363
end
364-
((flags & strip_prepositions) > 0) && union!(skip_words, prepositions(lang))
365-
((flags & strip_pronouns) > 0) && union!(skip_words, pronouns(lang))
366-
((flags & strip_stopwords) > 0) && union!(skip_words, stopwords(lang))
364+
((flags & strip_prepositions) > 0) && union!(skip_words, prepositions(language))
365+
((flags & strip_pronouns) > 0) && union!(skip_words, pronouns(language))
366+
((flags & strip_stopwords) > 0) && union!(skip_words, stopwords(language))
367367
if !isempty(skip_words)
368368
push!(rpatterns, _build_words_pattern(skip_words))
369369
end
@@ -386,7 +386,7 @@ end
386386

387387
function prepare(s::AbstractString,
388388
flags::UInt32;
389-
lang::Language = DEFAULT_LANGUAGE,
389+
language::Language = DEFAULT_LANGUAGE,
390390
skip_patterns = Vector{Regex}(),
391391
skip_words = Vector{AbstractString}())
392392
os = s # Initialize output string
@@ -407,14 +407,14 @@ function prepare(s::AbstractString,
407407
end
408408
# known words
409409
if (flags & strip_articles) > 0
410-
union!(skip_words, articles(lang))
410+
union!(skip_words, articles(language))
411411
else
412-
((flags & strip_indefinite_articles) > 0) && union!(skip_words, indefinite_articles(lang))
413-
((flags & strip_definite_articles) > 0) && union!(skip_words, definite_articles(lang))
412+
((flags & strip_indefinite_articles) > 0) && union!(skip_words, indefinite_articles(language))
413+
((flags & strip_definite_articles) > 0) && union!(skip_words, definite_articles(language))
414414
end
415-
((flags & strip_prepositions) > 0) && union!(skip_words, prepositions(lang))
416-
((flags & strip_pronouns) > 0) && union!(skip_words, pronouns(lang))
417-
((flags & strip_stopwords) > 0) && union!(skip_words, stopwords(lang))
415+
((flags & strip_prepositions) > 0) && union!(skip_words, prepositions(language))
416+
((flags & strip_pronouns) > 0) && union!(skip_words, pronouns(language))
417+
((flags & strip_stopwords) > 0) && union!(skip_words, stopwords(language))
418418
if !isempty(skip_words)
419419
push!(rpatterns, _build_words_pattern(skip_words))
420420
end
@@ -431,6 +431,6 @@ function prepare(s::AbstractString,
431431
os = remove_patterns(os, r)
432432
end
433433
# Stemming
434-
((flags & stem_words) > 0) && (os = stem(os))
434+
((flags & stem_words) > 0) && (os = stem(os, language=language))
435435
return os
436436
end

0 commit comments

Comments
 (0)