Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions internal/magic/text.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,72 @@ var (
)
// Rtf matches a Rich Text Format file.
Rtf = prefix([]byte("{\\rtf"))

// Markdown matches a Markdown document
// Markdown matches a Markdown document. Detection looks for multiple instances
// of standard Markdown syntax patterns to avoid false positives with plain text.
Markdown = func(raw []byte, limit uint32) bool {
// Skip if empty
if len(raw) == 0 {
return false
}

// Check first 1024 bytes for Markdown patterns
checkLen := 1024
if len(raw) < checkLen {
checkLen = len(raw)
}
content := raw[:checkLen]

// Lines that definitively indicate Markdown syntax when found at start or after newline
mdPatterns := [][]byte{
[]byte("# "), // ATX heading
[]byte("## "), // Level 2 heading
[]byte("### "), // Level 3 heading
[]byte("- "), // Unordered list
[]byte("* "), // Alternate unordered list
[]byte("1. "), // Ordered list
[]byte("> "), // Blockquote
[]byte("```"), // Code fence
}

// Special patterns that can appear anywhere
mdInlinePatterns := [][]byte{
[]byte("]("), // Link definition
}

// Count how many Markdown patterns we find
matches := 0

// Check patterns that must be at start or after newline
for _, pattern := range mdPatterns {
// Check at start of content
if bytes.HasPrefix(content, pattern) {
matches++
} else {
// Check after newlines
p := append([]byte{'\n'}, pattern...)
if bytes.Contains(content, p) {
matches++
}
}
if matches >= 2 {
return true
}
}

// Check inline patterns
for _, pattern := range mdInlinePatterns {
if bytes.Contains(content, pattern) {
matches++
}
if matches >= 2 {
return true
}
}

return false
}
)

// Text matches a plain text file.
Expand Down
3 changes: 3 additions & 0 deletions mimetype_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ var testcases = []testcase{
{"audio mp4 MSNV", "\x00\x00\x00\x18ftypMSNV", "audio/mp4", false},
{"audio mp4 NDAS", "\x00\x00\x00\x18ftypNDAS", "audio/mp4", false},
{"lnk", "\x4C\x00\x00\x00\x01\x14\x02\x00", "application/x-ms-shortcut", true},
{"markdown full", "# Heading\n\nParagraph with [link](http://example.com)\n\n* List item\n", "text/markdown", true},
{"markdown code", "# Title\n\n```go\nfunc main() {\n fmt.Println(\"Hello\")\n}\n```\n", "text/markdown", true},
{"markdown it is not", "This is just [some] plain text with * asterisks * and # hashes\nBut no actual markdown.", "text/plain; charset=utf-8", false},
{"mdb", offset(4, "Standard Jet DB"), "application/x-msaccess", true},
{"midi", "\x4D\x54\x68\x64", "audio/midi", true},
{"mkv", "\x1a\x45\xdf\xa3\x01\x00\x00\x00\x00\x00\x00\x23\x42\x86\x81\x01\x42\xf7\x81\x01\x42\xf2\x81\x04\x42\xf3\x81\x08\x42\x82\x88\x6d\x61\x74\x72\x6f\x73\x6b\x61", "video/x-matroska", true},
Expand Down
3 changes: 2 additions & 1 deletion supported_mimes.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 178 Supported MIME types
## 179 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.

Extension | MIME type | Aliases
Expand Down Expand Up @@ -181,3 +181,4 @@ Extension | MIME type | Aliases
**.ics** | text/calendar | -
**.warc** | application/warc | -
**.vtt** | text/vtt | -
**.md** | text/markdown | -
25 changes: 13 additions & 12 deletions tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,20 +80,21 @@ var (
alias("application/x-ogg")
oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo)
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt)
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt, markdown)
xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2).
alias("application/xml")
json = newMIME("application/json", ".json", magic.JSON, geoJSON, har)
har = newMIME("application/json", ".har", magic.HAR)
csv = newMIME("text/csv", ".csv", magic.Csv)
tsv = newMIME("text/tab-separated-values", ".tsv", magic.Tsv)
geoJSON = newMIME("application/geo+json", ".geojson", magic.GeoJSON)
ndJSON = newMIME("application/x-ndjson", ".ndjson", magic.NdJSON)
html = newMIME("text/html", ".html", magic.HTML)
php = newMIME("text/x-php", ".php", magic.Php)
rtf = newMIME("text/rtf", ".rtf", magic.Rtf).alias("application/rtf")
js = newMIME("text/javascript", ".js", magic.Js).
alias("application/x-javascript", "application/javascript")
json = newMIME("application/json", ".json", magic.JSON, geoJSON, har)
markdown = newMIME("text/markdown", ".md", magic.Markdown)
har = newMIME("application/json", ".har", magic.HAR)
csv = newMIME("text/csv", ".csv", magic.Csv)
tsv = newMIME("text/tab-separated-values", ".tsv", magic.Tsv)
geoJSON = newMIME("application/geo+json", ".geojson", magic.GeoJSON)
ndJSON = newMIME("application/x-ndjson", ".ndjson", magic.NdJSON)
html = newMIME("text/html", ".html", magic.HTML)
php = newMIME("text/x-php", ".php", magic.Php)
rtf = newMIME("text/rtf", ".rtf", magic.Rtf).alias("application/rtf")
js = newMIME("text/javascript", ".js", magic.Js).
alias("application/x-javascript", "application/javascript")
srt = newMIME("application/x-subrip", ".srt", magic.Srt).
alias("application/x-srt", "text/x-srt")
vtt = newMIME("text/vtt", ".vtt", magic.Vtt)
Expand Down