diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb
index 9499118120..661f108013 100644
--- a/lib/rdoc/markup/to_html.rb
+++ b/lib/rdoc/markup/to_html.rb
@@ -42,6 +42,86 @@ class RDoc::Markup::ToHtml < RDoc::Markup::Formatter
# :section:
+ # Maps an encoding to a Hash of characters properly transcoded for that
+ # encoding.
+ #
+ # See also encode_fallback.
+
+ TO_HTML_CHARACTERS = Hash.new do |h, encoding|
+ h[encoding] = {
+ close_dquote: encode_fallback('”', encoding, '"'),
+ close_squote: encode_fallback('’', encoding, '\''),
+ copyright: encode_fallback('©', encoding, '(c)'),
+ ellipsis: encode_fallback('…', encoding, '...'),
+ dot_ellipsis: encode_fallback('.…', encoding, '....'),
+ em_dash: encode_fallback('—', encoding, '---'),
+ en_dash: encode_fallback('–', encoding, '--'),
+ open_dquote: encode_fallback('“', encoding, '"'),
+ open_squote: encode_fallback('‘', encoding, '\''),
+ trademark: encode_fallback('®', encoding, '(r)'),
+ }
+ end
+
+ HTML_CHARACTER_ALIASES = {
+ '(c)' => :copyright,
+ '(C)' => :copyright,
+ '(r)' => :trademark,
+ '(R)' => :trademark,
+ '---' => :em_dash,
+ '--' => :en_dash,
+ '....' => :dot_ellipsis,
+ '...' => :ellipsis,
+ '``' => :open_dquote,
+ "''" => :close_dquote,
+ }
+
+ # Transcodes +character+ to +encoding+ with a +fallback+ character.
+
+ def self.encode_fallback(character, encoding, fallback)
+ character.encode(
+ encoding,
+ fallback: { character => fallback },
+ undef: :replace,
+ replace: fallback
+ )
+ end
+
+ # Converts ascii quote pairs to multibyte quote characters
+ class QuoteConverter
+
+ def initialize
+ @in_dquote = false
+ @in_squote = false
+ end
+
+ def convert(quote, after_word:)
+ case quote
+ when '"'
+ type = @in_dquote ? :close_dquote : :open_dquote
+ @in_dquote = !@in_dquote
+ when "'"
+ if @in_squote
+ type = :close_squote
+ @in_squote = false
+ elsif after_word
+ # Mary's dog, my parents' house: do not start paired quotes
+ type = :close_squote
+ else
+ type = :open_squote
+ @in_squote = true
+ end
+ when '`'
+ # Opening quote of `quoted sentence'.
+ # This will conflict with code blocks `puts('hello')` in the future.
+ if !@in_squote && !after_word
+ type = :open_squote
+ @in_squote = true
+ end
+ end
+ TO_HTML_CHARACTERS[quote.encoding][type] if type
+ end
+ end
+
##
# Creates a new formatter that will output HTML
@@ -55,6 +135,7 @@ def initialize(pipe: false, output_decoration: true)
@in_list_entry = nil
@list = nil
@th = nil
+ @quote_converter = nil
@in_tidylink_label = false
@hard_break = "
\n"
@@ -79,6 +160,11 @@ def init_regexp_handlings
# suppress crossref: \#method \::method \ClassName \method_with_underscores
@markup.add_regexp_handling(/\\(?:[#:A-Z]|[a-z]+_[a-z0-9])/, :SUPPRESSED_CROSSREF)
+ @markup.add_regexp_handling(Regexp.union(HTML_CHARACTER_ALIASES.keys), :HTML_CHARACTERS)
+
+ @markup.add_regexp_handling(/\b['"`]/, :QUOTE_AFTER_WORD)
+ @markup.add_regexp_handling(/\B['"`]/, :QUOTE_NOT_AFTER_WORD)
+
init_link_notation_regexp_handlings
end
@@ -231,12 +317,28 @@ def handle_TIDYLINK(label_part, url)
def handle_inline(text) # :nodoc:
@inline_output = +''
+ @quote_converter = QuoteConverter.new
super
out = @inline_output
@inline_output = nil
+ @quote_converter = nil
out
end
+ # Converts (c), (r), --, --- , ..., ...., ``, '' to HTML characters.
+ def handle_regexp_HTML_CHARACTERS(text)
+ name = HTML_CHARACTER_ALIASES[text]
+ TO_HTML_CHARACTERS[text.encoding][name] if name
+ end
+
+ def handle_regexp_QUOTE_NOT_AFTER_WORD(text)
+ @quote_converter.convert(text, after_word: false) || convert_string(text)
+ end
+
+ def handle_regexp_QUOTE_AFTER_WORD(text)
+ @quote_converter.convert(text, after_word: true) || convert_string(text)
+ end
+
# Converts suppressed cross-reference +text+ to HTML by removing the leading backslash.
def handle_regexp_SUPPRESSED_CROSSREF(text)
@@ -576,9 +678,6 @@ def parseable?(text)
# Converts +item+ to HTML using RDoc::Text#to_html
def to_html(item)
- # Ideally, we should convert html characters at handle_PLAIN_TEXT or somewhere else,
- # but we need to convert it here for now because to_html_characters converts pair of backticks to ’‘ and pair of double backticks to ”“.
- # Known bugs: `...` in `def f(...); end` and `(c) in `` will be wrongly converted.
- to_html_characters(handle_inline(item))
+ handle_inline(item)
end
end
diff --git a/lib/rdoc/markup/to_html_snippet.rb b/lib/rdoc/markup/to_html_snippet.rb
index 6687dc59f6..62c54c1559 100644
--- a/lib/rdoc/markup/to_html_snippet.rb
+++ b/lib/rdoc/markup/to_html_snippet.rb
@@ -109,7 +109,7 @@ def accept_verbatim(verbatim)
input = verbatim.text.rstrip
text = truncate(input, @character_limit - @characters)
@characters += input.length
- text << ' ...' unless text == input
+ text << " #{TO_HTML_CHARACTERS[text.encoding][:ellipsis]}" unless text == input
super RDoc::Markup::Verbatim.new text
@@ -262,14 +262,14 @@ def handle_inline(text)
return ['', 0] if limit <= 0
@inline_character_limit = limit
res = super
- res << ' ...' if @inline_character_limit <= 0
+ res << " #{TO_HTML_CHARACTERS[text.encoding][:ellipsis]}" if @inline_character_limit <= 0
@characters += limit - @inline_character_limit
res
end
def to_html(item)
throw :done if @characters >= @character_limit
- to_html_characters(handle_inline(item))
+ handle_inline(item)
end
##
diff --git a/lib/rdoc/text.rb b/lib/rdoc/text.rb
index fc16211df1..f65861868f 100644
--- a/lib/rdoc/text.rb
+++ b/lib/rdoc/text.rb
@@ -29,34 +29,6 @@ module RDoc::Text
MARKUP_FORMAT.default = RDoc::Markup
- ##
- # Maps an encoding to a Hash of characters properly transcoded for that
- # encoding.
- #
- # See also encode_fallback.
-
- TO_HTML_CHARACTERS = Hash.new do |h, encoding|
- h[encoding] = {
- :close_dquote => encode_fallback('”', encoding, '"'),
- :close_squote => encode_fallback('’', encoding, '\''),
- :copyright => encode_fallback('©', encoding, '(c)'),
- :ellipsis => encode_fallback('…', encoding, '...'),
- :em_dash => encode_fallback('—', encoding, '---'),
- :en_dash => encode_fallback('–', encoding, '--'),
- :open_dquote => encode_fallback('“', encoding, '"'),
- :open_squote => encode_fallback('‘', encoding, '\''),
- :trademark => encode_fallback('®', encoding, '(r)'),
- }
- end
-
- ##
- # Transcodes +character+ to +encoding+ with a +fallback+ character.
-
- def self.encode_fallback(character, encoding, fallback)
- character.encode(encoding, :fallback => { character => fallback },
- :undef => :replace, :replace => fallback)
- end
-
##
# Expands tab characters in +text+ to eight spaces
@@ -193,95 +165,6 @@ def strip_stars(text)
text.gsub(/^\s+$/, empty)
end
- def to_html(text)
- to_html_characters(text)
- end
-
- ##
- # Converts ampersand, dashes, ellipsis, quotes, copyright and registered
- # trademark symbols in +text+ to properly encoded characters.
-
- def to_html_characters(text)
- html = (''.encode text.encoding).dup
-
- encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding]
-
- s = StringScanner.new text
- insquotes = false
- indquotes = false
- after_word = nil
-
- until s.eos? do
- case
- when s.scan(/<(tt|code)>.*?<\/\1>/) then # skip contents of tt
- html << s.matched
- when s.scan(/<(tt|code)>.*?/) then
- warn "mismatched <#{s[1]}> tag" # TODO signal file/line
- html << s.matched
- when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags
- html << s.matched
- when s.scan(/\.\.\.(\.?)/) then
- html << s[1] << encoded[:ellipsis]
- after_word = nil
- when s.scan(/\(c\)/i) then
- html << encoded[:copyright]
- after_word = nil
- when s.scan(/\(r\)/i) then
- html << encoded[:trademark]
- after_word = nil
- when s.scan(/---/) then
- html << encoded[:em_dash]
- after_word = nil
- when s.scan(/--/) then
- html << encoded[:en_dash]
- after_word = nil
- when s.scan(/"|"/) then
- html << encoded[indquotes ? :close_dquote : :open_dquote]
- indquotes = !indquotes
- after_word = nil
- when s.scan(/``/) then # backtick double quote
- html << encoded[:open_dquote]
- after_word = nil
- when s.scan(/(?:'|'){2}/) then # tick double quote
- html << encoded[:close_dquote]
- after_word = nil
- when s.scan(/`/) then # backtick
- if insquotes or after_word
- html << '`'
- after_word = false
- else
- html << encoded[:open_squote]
- insquotes = true
- end
- when s.scan(/'|'/) then # single quote
- if insquotes
- html << encoded[:close_squote]
- insquotes = false
- elsif after_word
- # Mary's dog, my parents' house: do not start paired quotes
- html << encoded[:close_squote]
- else
- html << encoded[:open_squote]
- insquotes = true
- end
-
- after_word = nil
- else # advance to the next potentially significant character
- match = s.scan(/.+?(?=[<\\.("'`&-])/) #"
-
- if match then
- html << match
- after_word = match =~ /\w$/
- else
- html << s.rest
- break
- end
- end
- end
-
- html
- end
-
##
# Wraps +txt+ to +line_len+
diff --git a/test/rdoc/markup/to_html_crossref_test.rb b/test/rdoc/markup/to_html_crossref_test.rb
index 24e0c97e86..65f0543753 100644
--- a/test/rdoc/markup/to_html_crossref_test.rb
+++ b/test/rdoc/markup/to_html_crossref_test.rb
@@ -118,7 +118,7 @@ def test_convert_CROSSREF_section_with_spaces
def test_convert_CROSSREF_legacy_label
result = @to.convert 'C1@What-27s+Here'
- assert_equal para("What\u2019s Here at C1"), result
+ assert_equal para("What's Here at C1"), result
end
def test_convert_CROSSREF_legacy_label_colon
@@ -130,7 +130,7 @@ def test_convert_CROSSREF_legacy_section
@c1.add_section "What's Here"
result = @to.convert "C1@What-27s+Here"
- assert_equal para("What\u2019s Here at C1"), result
+ assert_equal para("What's Here at C1"), result
end
def test_convert_CROSSREF_constant
diff --git a/test/rdoc/markup/to_html_snippet_test.rb b/test/rdoc/markup/to_html_snippet_test.rb
index b468b57474..28d45bd56b 100644
--- a/test/rdoc/markup/to_html_snippet_test.rb
+++ b/test/rdoc/markup/to_html_snippet_test.rb
@@ -543,7 +543,7 @@ def test_convert_limit_verbatim
Hello There
This is some text, it will be cut off after 100 characters -
This one is cut off in this verbatim ...+
This one is cut off in this verbatim …EXPECTED actual = @to.convert rdoc diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb index aa9ef04aa8..d082bf94c2 100644 --- a/test/rdoc/markup/to_html_test.rb +++ b/test/rdoc/markup/to_html_test.rb @@ -718,6 +718,51 @@ def test_convert_string assert_equal '<>', @to.convert_string('<>') end + def test_self_converter_encode_fallback + assert_equal '…', + RDoc::Markup::ToHtml::encode_fallback('…', Encoding::UTF_8, '...') + assert_equal '...', + RDoc::Markup::ToHtml::encode_fallback('…', Encoding::US_ASCII, '...') + end + + def test_convert_HTML_CHARACTER + result = @to.convert "(c)(r)(C)(R)...--....---``''" + assert_equal "\n
©®©®…–.…—“”
\n", result + + result = @to.convert "(c)(r)(C)(R)...--....---``''" + assert_equal "\n(c)(r)(C)(R)...--....---``''
#{expected}
\n", result + end + + def test_convert_QUOTE_dquote + result = @to.convert '"This is a +quoted+ string." and "another"' + assert_equal "\n“This is a quoted string.” and “another”
‘quote’ ‘1+2’. I’m ‘RDoc’
\n", result + end + + def test_convert_QUOTE_backtick + result = @to.convert "This is `quote' and this is `code`" + assert_equal "\nThis is ‘quote’ and this is code