Move all awk functions to separate files

This commit is contained in:
2026-03-07 19:42:37 +01:00
parent 3258616282
commit f580ed9cab
17 changed files with 838 additions and 777 deletions

14
awk/blockquote.awk Normal file
View File

@@ -0,0 +1,14 @@
BEGIN { in_bq = 0 }
/^>[[:space:]]?/ {
if (!in_bq) { print "<blockquote>"; in_bq = 1 }
sub(/^>[[:space:]]?/, "", $0)
print $0
next
}
{
if (in_bq) { print "</blockquote>"; in_bq = 0 }
print
}
END {
if (in_bq) print "</blockquote>"
}

View File

@@ -0,0 +1,46 @@
function cap(s) { return toupper(substr(s, 1, 1)) tolower(substr(s, 2)) }
BEGIN { count = 0 }
{ lines[++count] = $0 }
END {
i = 1
while (i <= count) {
if (lines[i] == "<blockquote>") {
j = i + 1
while (j <= count && lines[j] != "</blockquote>") j++
if (j <= count) {
first = ""
first_idx = 0
for (k = i + 1; k < j; k++) {
if (lines[k] != "") {
first = lines[k]
first_idx = k
break
}
}
if (first ~ /^\[![A-Za-z]+\]$/) {
kind = first
sub(/^\[!/, "", kind)
sub(/\]$/, "", kind)
lkind = tolower(kind)
if (lkind == "note" || lkind == "tip" || lkind == "important" || lkind == "warning" || lkind == "caution") {
print "<div class=\"admonition admonition-" lkind "\">"
print "<p class=\"admonition-title\">" cap(lkind) "</p>"
has_body = 0
for (k = first_idx + 1; k < j; k++) {
if (lines[k] != "") {
print "<p>" lines[k] "</p>"
has_body = 1
}
}
if (!has_body) print "<p></p>"
print "</div>"
i = j + 1
continue
}
}
}
}
print lines[i]
i++
}
}

5
awk/breaks.awk Normal file
View File

@@ -0,0 +1,5 @@
{
if ($0 == "" && prev == "") print "<br />"
else print $0
prev = $0
}

27
awk/fenced_code.awk Normal file
View File

@@ -0,0 +1,27 @@
BEGIN { in_fence = 0; first_line = 0 }
{
if (!in_fence && $0 ~ /^```/) {
in_fence = 1
first_line = 1
next
}
if (in_fence && $0 ~ /^```[[:space:]]*$/) {
print "</code></pre>"
in_fence = 0
next
}
if (in_fence) {
if (first_line) {
first_line = 0
if ($0 == "") next
print "<pre><code>" $0
} else {
print
}
} else {
print
}
}
END {
if (in_fence) print "</code></pre>"
}

View File

@@ -4,6 +4,34 @@ function title_from_name(name) {
return name return name
} }
function compare_paths(p1, p2, parts1, parts2, n1, n2, i, name1, name2, lname1, lname2, w1, w2) {
n1 = split(p1, parts1, "/")
n2 = split(p2, parts2, "/")
for (i = 1; i <= n1 && i <= n2; i++) {
name1 = parts1[i]
name2 = parts2[i]
if (i == n1) gsub(/\.md$/, "", name1)
if (i == n2) gsub(/\.md$/, "", name2)
lname1 = tolower(name1)
lname2 = tolower(name2)
if (lname1 == "index" && i == n1 && lname2 != "index") return -1
if (lname2 == "index" && i == n2 && lname1 != "index") return 1
w1 = (lname1 in custom_order ? custom_order[lname1] : 999999)
w2 = (lname2 in custom_order ? custom_order[lname2] : 999999)
if (w1 < w2) return -1
if (w1 > w2) return 1
if (lname1 < lname2) return -1
if (lname1 > lname2) return 1
}
if (n1 < n2) return -1
if (n1 > n2) return 1
return 0
}
BEGIN { BEGIN {
n_dlines = split(dinfo, dlines, "\n") n_dlines = split(dinfo, dlines, "\n")
for (i = 1; i <= n_dlines; i++) { for (i = 1; i <= n_dlines; i++) {
@@ -39,33 +67,6 @@ BEGIN {
has_index[dir] = 1 has_index[dir] = 1
} }
} }
function compare_paths(p1, p2, parts1, parts2, n1, n2, i, name1, name2, lname1, lname2, w1, w2) {
n1 = split(p1, parts1, "/")
n2 = split(p2, parts2, "/")
for (i = 1; i <= n1 && i <= n2; i++) {
name1 = parts1[i]
name2 = parts2[i]
if (i == n1) gsub(/\.md$/, "", name1)
if (i == n2) gsub(/\.md$/, "", name2)
lname1 = tolower(name1)
lname2 = tolower(name2)
if (lname1 == "index" && i == n1 && lname2 != "index") return -1
if (lname2 == "index" && i == n2 && lname1 != "index") return 1
w1 = (lname1 in custom_order ? custom_order[lname1] : 999999)
w2 = (lname2 in custom_order ? custom_order[lname2] : 999999)
if (w1 < w2) return -1
if (w1 > w2) return 1
if (lname1 < lname2) return -1
if (lname1 > lname2) return 1
}
if (n1 < n2) return -1
if (n1 > n2) return 1
return 0
}
END { END {
for (i = 0; i < count - 1; i++) { for (i = 0; i < count - 1; i++) {
@@ -78,7 +79,6 @@ END {
} }
} }
print "<ul>" print "<ul>"
if ("index.md" in all_paths) { if ("index.md" in all_paths) {
print "<li><a href=\"/index.html\">Home</a></li>" print "<li><a href=\"/index.html\">Home</a></li>"

65
awk/headers.awk Normal file
View File

@@ -0,0 +1,65 @@
function strip_markdown(s) {
gsub(/<[^>]+>/, "", s)
gsub(/[*_`~]/, "", s)
gsub(/[\[\]]/, "", s)
gsub(/\([^\)]*\)/, "", s)
sub(/^[[:space:]]*/, "", s)
sub(/[[:space:]]*$/, "", s)
return s
}
BEGIN {
has_prev = 0
in_pre = 0
}
{
if ($0 ~ /^<pre><code>/) {
in_pre = 1
if (has_prev && prev != "") { print prev; has_prev = 0 }
print
next
}
if (in_pre) {
if ($0 ~ /<\/code><\/pre>/) in_pre = 0
print
next
}
if ($0 ~ /^=+$/ && has_prev && prev != "" && prev !~ /^<[a-z]/) {
print "<h1 id=\"" strip_markdown(prev) "\">" prev "</h1>"
has_prev = 0
} else if ($0 ~ /^-+$/ && has_prev && prev != "" && prev !~ /^<[a-z]/) {
print "<h2 id=\"" strip_markdown(prev) "\">" prev "</h2>"
has_prev = 0
} else {
if (has_prev) {
line = prev
if (line ~ /^# /) {
sub(/^# /, "", line); print "<h1 id=\"" strip_markdown(line) "\">" line "</h1>"
} else if (line ~ /^## /) {
sub(/^## /, "", line); print "<h2 id=\"" strip_markdown(line) "\">" line "</h2>"
} else if (line ~ /^### /) {
sub(/^### /, "", line); print "<h3 id=\"" strip_markdown(line) "\">" line "</h3>"
} else if (line ~ /^#### /) {
sub(/^#### /, "", line); print "<h4 id=\"" strip_markdown(line) "\">" line "</h4>"
} else if (line ~ /^##### /) {
sub(/^##### /, "", line); print "<h5 id=\"" strip_markdown(line) "\">" line "</h5>"
} else if (line ~ /^###### /) {
sub(/^###### /, "", line); print "<h6 id=\"" strip_markdown(line) "\">" line "</h6>"
} else {
print prev
}
}
prev = $0
has_prev = 1
}
}
END {
if (has_prev) {
line = prev
if (line ~ /^# /) {
sub(/^# /, "", line); print "<h1 id=\"" strip_markdown(line) "\">" line "</h1>"
} else {
print prev
}
}
}

9
awk/indented_code.awk Normal file
View File

@@ -0,0 +1,9 @@
BEGIN { in_code = 0 }
/^ | / {
if (!in_code) { print "<pre><code>"; in_code = 1 }
sub(/^ | /, "", $0)
gsub(/&/, "&amp;"); gsub(/</, "&lt;"); gsub(/>/, "&gt;")
print; next
}
{ if (in_code) { print "</code></pre>"; in_code = 0 } print }
END { if (in_code) print "</code></pre>" }

66
awk/lists.awk Normal file
View File

@@ -0,0 +1,66 @@
BEGIN {
depth = 0
in_pre = 0
}
{
if ($0 ~ /^<pre>/) in_pre = 1
if (in_pre) {
while (depth > 0) { print "</" cur_type[depth] ">"; depth-- }
print
if ($0 ~ /<\/pre>/) in_pre = 0
next
}
line = $0
type = ""
# match list marker and its preceding spaces
if (line ~ /^[ \t]*[*+-] /) {
type = "ul"
match(line, /^[ \t]*[*+-] /)
marker_len = RLENGTH
} else if (line ~ /^[ \t]*[0-9]+\. /) {
type = "ol"
match(line, /^[ \t]*[0-9]+\. /)
marker_len = RLENGTH
}
if (type != "") {
content = substr(line, marker_len + 1)
# get indentation level
match(line, /^[ \t]*/)
indent = RLENGTH
if (depth == 0 || indent > cur_indent[depth]) {
depth++
cur_indent[depth] = indent
cur_type[depth] = type
print "<" type ">"
} else {
while (depth > 1 && indent < cur_indent[depth]) {
print "</" cur_type[depth] ">"
depth--
}
if (type != cur_type[depth]) {
print "</" cur_type[depth] ">"
cur_type[depth] = type
print "<" type ">"
}
}
print "<li>" content "</li>"
} else {
while (depth > 0) {
print "</" cur_type[depth] ">"
depth--
}
print line
}
}
END {
while (depth > 0) {
print "</" cur_type[depth] ">"
depth--
}
}

View File

@@ -119,11 +119,35 @@ function css_highlight_line(line, m, prop, val) {
return "<span class=\"tok-punc\">}</span>" return "<span class=\"tok-punc\">}</span>"
} }
if (match(line, /^([[:space:]]*)(--?[A-Za-z0-9_-]+)([[:space:]]*:[[:space:]]*)([^;]*)(;?[[:space:]]*)$/, m)) { if (line ~ /^[[:space:]]*--?[A-Za-z0-9_-]+[[:space:]]*:[[:space:]]*[^;]*;?[[:space:]]*$/) {
prop = "<span class=\"tok-prop\">" m[2] "</span>" match(line, /:[[:space:]]*/)
gsub(/var\(--[A-Za-z0-9_-]+\)/, "<span class=\"tok-var\">&</span>", m[4]) sep_pos = RSTART
val = "<span class=\"tok-val\">" m[4] "</span>" sep_len = RLENGTH
return m[1] prop m[3] val m[5]
pre_sep = substr(line, 1, sep_pos - 1)
sep = substr(line, sep_pos, sep_len)
post_sep = substr(line, sep_pos + sep_len)
match(pre_sep, /--?[A-Za-z0-9_-]+/)
prop_pos = RSTART
prop_len = RLENGTH
indent = substr(pre_sep, 1, prop_pos - 1)
prop_name = substr(pre_sep, prop_pos, prop_len)
if (match(post_sep, /;[[:space:]]*$/)) {
val_part = substr(post_sep, 1, RSTART - 1)
suffix = substr(post_sep, RSTART)
} else {
val_part = post_sep
suffix = ""
}
prop = "<span class=\"tok-prop\">" prop_name "</span>"
gsub(/var\(--[A-Za-z0-9_-]+\)/, "<span class=\"tok-var\">&</span>", val_part)
val = "<span class=\"tok-val\">" val_part "</span>"
return indent prop sep val suffix
} }
return line return line

173
awk/markdown_inline.awk Normal file
View File

@@ -0,0 +1,173 @@
BEGIN {
in_pre = 0
}
{
if ($0 ~ /<pre>/) {
in_pre = 1
}
if (in_pre) {
print
if ($0 ~ /<\/pre>/) {
in_pre = 0
}
next
}
line = $0
# automatic links
while (match(line, /<https?:\/\/[^>]+>/)) {
start = RSTART; len = RLENGTH
url = substr(line, start + 1, len - 2)
repl = "<a href=\"" url "\">" url "</a>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# automatic email address links
while (match(line, /<[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}>/)) {
start = RSTART; len = RLENGTH
email = substr(line, start + 1, len - 2)
repl = "<a href=\"mailto:" email "\">" email "</a>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# force-inline image syntax (double bang)
while (match(line, /!!\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); src = inner
repl = "<img data-force-inline=\"1\" alt=\"" alt "\" src=\"" src "\" title=\"" title "\" />"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /!!\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); src = substr(token, RSTART + 1, RLENGTH - 1)
repl = "<img data-force-inline=\"1\" alt=\"" alt "\" src=\"" src "\" />"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# inline image
while (match(line, /!\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); src = inner
repl = "<img alt=\"" alt "\" src=\"" src "\" title=\"" title "\" />"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /!\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); src = substr(token, RSTART + 1, RLENGTH - 1)
repl = "<img alt=\"" alt "\" src=\"" src "\" />"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# inline link
while (match(line, /\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && (substr(line, start - 1, 1) == "\\" || substr(line, start - 1, 1) == "!")) break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); text = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); href = inner
repl = "<a href=\"" href "\" title=\"" title "\">" text "</a>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && (substr(line, start - 1, 1) == "\\" || substr(line, start - 1, 1) == "!")) break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); text = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); href = substr(token, RSTART + 1, RLENGTH - 1)
repl = "<a href=\"" href "\">" text "</a>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# MFM font syntax
while (match(line, /\$\[font\.serif [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 13, len - 14)
line = substr(line, 1, start - 1) "<span style=\"font-family: serif;\">" content "</span>" substr(line, start + len)
}
while (match(line, /\$\[font\.monospace [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 17, len - 18)
line = substr(line, 1, start - 1) "<span style=\"font-family: monospace;\">" content "</span>" substr(line, start + len)
}
while (match(line, /\$\[font\.sans [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 12, len - 13)
line = substr(line, 1, start - 1) "<span style=\"font-family: sans-serif;\">" content "</span>" substr(line, start + len)
}
# Bold, Italic, Strikethrough (BRE-like logic in AWK)
# Strong Bold **
while (match(line, /\*\*[^*]+\*\*/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "<strong>" content "</strong>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Strong Bold __
while (match(line, /__[^_]+__/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "<strong>" content "</strong>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Italic *
while (match(line, /\*[^*]+\*/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 1, len - 2)
repl = "<em>" content "</em>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Italic _
while (match(line, /_[^_]+_/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
content = substr(line, start + 1, len - 2)
repl = "<em>" content "</em>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Strikethrough ~~
while (match(line, /~~[^~]+~~/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "<strike>" content "</strike>"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# special characters
if (line !~ /&[A-Za-z0-9#]+;/) {
gsub(/&/, "&amp;", line)
}
p = 1
while (match(substr(line, p), /</)) {
start = p + RSTART - 1
next_char = substr(line, start + 1, 1)
if (next_char !~ /^[\/A-Za-z]/) {
line = substr(line, 1, start - 1) "&lt;" substr(line, start + 1)
p = start + 4
} else {
p = start + 1
}
}
print line
}

85
awk/mask_inline_code.awk Normal file
View File

@@ -0,0 +1,85 @@
function mask(s, t) {
t = s
gsub(/\*/, "\034P0\034", t)
gsub(/_/, "\034P1\034", t)
gsub(/`/, "\034P2\034", t)
gsub(/\[/, "\034P3\034", t)
gsub(/\]/, "\034P4\034", t)
gsub(/\(/, "\034P5\034", t)
gsub(/\)/, "\034P6\034", t)
gsub(/!/, "\034P7\034", t)
gsub(/\$/, "\034P8\034", t)
gsub(/#/, "\034P9\034", t)
gsub(/\+/, "\034P10\034", t)
gsub(/-/, "\034P11\034", t)
gsub(/\\/, "\034P12\034", t)
gsub(/</, "\034P13\034", t)
gsub(/>/, "\034P14\034", t)
return t
}
{
# backslash escapes
gsub(/\\\*/, "\034P0\034")
gsub(/\\_/, "\034P1\034")
gsub(/\\`/, "\034P2\034")
gsub(/\\\[/, "\034P3\034")
gsub(/\\\]/, "\034P4\034")
gsub(/\\\(/, "\034P5\034")
gsub(/\\\)/, "\034P6\034")
gsub(/\\!/, "\034P7\034")
gsub(/\\\$/, "\034P8\034")
gsub(/\\#/, "\034P9\034")
gsub(/\\\+/, "\034P10\034")
gsub(/\\\-/, "\034P11\034")
gsub(/\\\\/, "\034P12\034")
gsub(/\\</, "\034P13\034")
gsub(/\\>/, "\034P14\034")
# inline code (1 or 2 backticks)
line = $0
if (line ~ /^```/) {
print line
next
}
out = ""
p = 1
while (match(substr(line, p), /`+/)) {
pstart = p + RSTART - 1
plen = RLENGTH
if (plen >= 3) {
out = out substr(line, p, pstart - p + plen)
p = pstart + plen
continue
}
# Found 1 or 2 backticks at pstart
# Search for closing marker
marker = substr(line, pstart, plen)
tail = substr(line, pstart + plen)
mpos = index(tail, marker)
if (mpos > 0) {
# Check if it is followed by more backticks
if (substr(tail, mpos + plen, 1) == "`") {
# Not a match, treat as literal
out = out substr(line, p, pstart - p + plen)
p = pstart + plen
continue
}
# Found match!
content = substr(tail, 1, mpos - 1)
out = out substr(line, p, pstart - p)
if (plen == 2 && substr(content, 1, 1) == " " && substr(content, length(content), 1) == " ") {
content = substr(content, 2, length(content) - 2)
}
out = out "<code>" mask(content) "</code>"
p = pstart + plen + mpos + plen - 1
} else {
# No closing marker, treat as literal
out = out substr(line, p, pstart - p + plen)
p = pstart + plen
}
}
out = out substr(line, p)
print out
}

52
awk/mask_plain.awk Normal file
View File

@@ -0,0 +1,52 @@
function find_unescaped_tag(s, tag, p, off, pos) {
p = 1
while (1) {
off = index(substr(s, p), tag)
if (off == 0) return 0
pos = p + off - 1
if (pos == 1 || substr(s, pos - 1, 1) != "\\") return pos
p = pos + 1
}
}
function mask_plain(s, t) {
t = s
gsub(/\*/, "\034P0\034", t)
gsub(/_/, "\034P1\034", t)
gsub(/`/, "\034P2\034", t)
gsub(/\[/, "\034P3\034", t)
gsub(/\]/, "\034P4\034", t)
gsub(/\(/, "\034P5\034", t)
gsub(/\)/, "\034P6\034", t)
gsub(/!/, "\034P7\034", t)
gsub(/\$/, "\034P8\034", t)
return t
}
BEGIN { in_plain = 0 }
{
line = $0
out = ""
while (1) {
if (!in_plain) {
pos = find_unescaped_tag(line, "<plain>")
if (pos == 0) {
out = out line
break
}
out = out substr(line, 1, pos - 1) "<mfmplain>"
line = substr(line, pos + 7)
in_plain = 1
} else {
pos = find_unescaped_tag(line, "</plain>")
if (pos == 0) {
out = out mask_plain(line)
line = ""
break
}
out = out mask_plain(substr(line, 1, pos - 1)) "</mfmplain>"
line = substr(line, pos + 8)
in_plain = 0
}
}
print out
}

43
awk/paragraphs.awk Normal file
View File

@@ -0,0 +1,43 @@
BEGIN {
in_p = 0
in_pre = 0
}
{
if ($0 ~ /^<pre>/) in_pre = 1
if (in_pre) {
if (in_p) { print "</p>"; in_p = 0 }
print
if ($0 ~ /<\/pre>/) in_pre = 0
next
}
if ($0 ~ /^<\/?(div|table|p|[ou]l|h[1-6]|[bh]r|blockquote|li)/) {
if (in_p) {
print "</p>"
in_p = 0
}
print
next
}
if ($0 == "") {
if (in_p) {
print "</p>"
in_p = 0
}
print
next
}
if (!in_p) {
print "<p>"
in_p = 1
}
print
}
END {
if (in_p) print "</p>"
}

143
awk/pipe_tables.awk Normal file
View File

@@ -0,0 +1,143 @@
function trim(s) {
sub(/^[[:space:]]+/, "", s)
sub(/[[:space:]]+$/, "", s)
return s
}
function is_table_row(line, t) {
t = line
return (t ~ /^[[:space:]]*\|/ && t ~ /\|[[:space:]]*$/)
}
function is_table_sep(line, t) {
if (!is_table_row(line)) return 0
t = line
gsub(/[|:\-[:space:]]/, "", t)
return (t == "" && line ~ /-/)
}
function split_row(line, out, n, i, raw) {
raw = line
sub(/^[[:space:]]*\|/, "", raw)
sub(/\|[[:space:]]*$/, "", raw)
n = split(raw, out, /\|/)
for (i = 1; i <= n; i++) out[i] = trim(out[i])
return n
}
function align_for(sep, t) {
t = trim(sep)
if (t ~ /^:-+:$/) return "center"
if (t ~ /^:-+$/) return "left"
if (t ~ /^-+:$/) return "right"
return ""
}
function render_cell(cell, inner) {
inner = trim(cell)
if (inner ~ /^```.*```$/) {
sub(/^```[[:space:]]*/, "", inner)
sub(/[[:space:]]*```$/, "", inner)
return "<pre><code>" inner "</code></pre>"
}
return inner
}
BEGIN { count = 0 }
{ lines[++count] = $0 }
END {
in_pre = 0
i = 1
while (i <= count) {
if (lines[i] ~ /^<pre><code>/) {
in_pre = 1
print lines[i]
i++
continue
}
if (in_pre) {
print lines[i]
if (lines[i] ~ /^<\/code><\/pre>/) in_pre = 0
i++
continue
}
if (i < count && is_table_row(lines[i]) && is_table_sep(lines[i + 1])) {
n_header = split_row(lines[i], header)
n_sep = split_row(lines[i + 1], sep)
n_cols = (n_header > n_sep ? n_header : n_sep)
print "<table>"
print "<thead>"
print "<tr>"
for (c = 1; c <= n_cols; c++) {
cell = (c <= n_header ? render_cell(header[c]) : "")
a = (c <= n_sep ? align_for(sep[c]) : "")
if (a != "") print "<th style=\"text-align: " a ";\">" cell "</th>"
else print "<th>" cell "</th>"
}
print "</tr>"
print "</thead>"
j = i + 2
print "<tbody>"
while (j <= count && is_table_row(lines[j])) {
n_body = split_row(lines[j], body)
print "<tr>"
for (c = 1; c <= n_cols; c++) {
cell = (c <= n_body ? render_cell(body[c]) : "")
a = (c <= n_sep ? align_for(sep[c]) : "")
if (a != "") print "<td style=\"text-align: " a ";\">" cell "</td>"
else print "<td>" cell "</td>"
}
print "</tr>"
j++
}
print "</tbody>"
print "</table>"
i = j
continue
}
if (is_table_sep(lines[i]) && i < count && is_table_row(lines[i + 1])) {
n_sep = split_row(lines[i], sep)
n_cols = n_sep
print "<table>"
print "<thead>"
print "<tr>"
for (c = 1; c <= n_cols; c++) {
a = align_for(sep[c])
if (a != "") print "<th style=\"text-align: " a ";\"></th>"
else print "<th></th>"
}
print "</tr>"
print "</thead>"
j = i + 1
print "<tbody>"
while (j <= count && is_table_row(lines[j])) {
n_body = split_row(lines[j], body)
print "<tr>"
for (c = 1; c <= n_cols; c++) {
cell = (c <= n_body ? render_cell(body[c]) : "")
a = (c <= n_sep ? align_for(sep[c]) : "")
if (a != "") print "<td style=\"text-align: " a ";\">" cell "</td>"
else print "<td>" cell "</td>"
}
print "</tr>"
j++
}
print "</tbody>"
print "</table>"
i = j
continue
}
print lines[i]
i++
}
}

10
awk/update_site_conf.awk Normal file
View File

@@ -0,0 +1,10 @@
BEGIN { done = 0 }
/^title[[:space:]]*=/ {
print "title = \"" new_title "\""
done = 1
next
}
{ print }
END {
if (!done) print "title = \"" new_title "\""
}

23
kewt.sh
View File

@@ -82,18 +82,7 @@ create_new_site() {
printf "# _kewt_ website\n" > "$new_dir/index.md" printf "# _kewt_ website\n" > "$new_dir/index.md"
if [ -n "$new_title" ]; then if [ -n "$new_title" ]; then
awk -v new_title="$new_title" ' awk -v new_title="$new_title" -f "$awk_dir/update_site_conf.awk" "$new_dir/site.conf" > "$new_dir/site.conf.tmp" && mv "$new_dir/site.conf.tmp" "$new_dir/site.conf"
BEGIN { done = 0 }
/^title[[:space:]]*=/ {
print "title = \"" new_title "\""
done = 1
next
}
{ print }
END {
if (!done) print "title = \"" new_title "\""
}
' "$new_dir/site.conf" > "$new_dir/site.conf.tmp" && mv "$new_dir/site.conf.tmp" "$new_dir/site.conf"
fi fi
echo "Created new site at '$new_dir'." echo "Created new site at '$new_dir'."
@@ -101,7 +90,7 @@ create_new_site() {
} }
generate_nav() { generate_nav() {
dinfo=$(find "$1" -not -path '*/.*' | sort | awk -v src="$1" -f "$awk_dir/collect_dir_info.awk") dinfo=$(find "$1" ! -path '*/.*' | sort | awk -v src="$1" -f "$awk_dir/collect_dir_info.awk")
find "$1" -name "*.md" | sort | awk -v src="$1" -v single_file_index="$single_file_index" -v flatten="$flatten" -v order="$order" -v dinfo="$dinfo" -f "$awk_dir/generate_sidebar.awk" find "$1" -name "*.md" | sort | awk -v src="$1" -v single_file_index="$single_file_index" -v flatten="$flatten" -v order="$order" -v dinfo="$dinfo" -f "$awk_dir/generate_sidebar.awk"
} }
@@ -314,9 +303,9 @@ find "$src" -type d | sort | while read -r dir; do
if [ ! -f "$dir/index.md" ]; then if [ ! -f "$dir/index.md" ]; then
if [ "$single_file_index" = "true" ]; then if [ "$single_file_index" = "true" ]; then
md_count=$(find "$dir" -maxdepth 1 -name "*.md" | wc -l) md_count=$(find "$dir" ! -name "$(basename "$dir")" -prune -name "*.md" | wc -l)
if [ "$md_count" -eq 1 ]; then if [ "$md_count" -eq 1 ]; then
md_file=$(find "$dir" -maxdepth 1 -name "*.md") md_file=$(find "$dir" ! -name "$(basename "$dir")" -prune -name "*.md")
render_markdown "$md_file" > "$out_dir/index.html" render_markdown "$md_file" > "$out_dir/index.html"
continue continue
fi fi
@@ -327,7 +316,7 @@ find "$src" -type d | sort | while read -r dir; do
[ -z "$display_dir" ] && display_dir="/" [ -z "$display_dir" ] && display_dir="/"
echo "# Index of $display_dir" > "$temp_index" echo "# Index of $display_dir" > "$temp_index"
echo "" >> "$temp_index" echo "" >> "$temp_index"
find "$dir" -maxdepth 1 -not -path '*/.*' -not -path "$dir" | sort | while read -r entry; do find "$dir" ! -name "$(basename "$dir")" -prune ! -path '*/.*' | sort | while read -r entry; do
name="${entry##*/}" name="${entry##*/}"
case "$name" in case "$name" in
template.html|site.conf|style.css|index.md) continue ;; template.html|site.conf|style.css|index.md) continue ;;
@@ -359,7 +348,7 @@ find "$src" -type f | sort | while IFS= read -r file; do
esac esac
if [ "$single_file_index" = "true" ] && [ "${file%.md}" != "$file" ] && [ ! -f "$(dirname "$file")/index.md" ]; then if [ "$single_file_index" = "true" ] && [ "${file%.md}" != "$file" ] && [ ! -f "$(dirname "$file")/index.md" ]; then
md_count=$(find "$(dirname "$file")" -maxdepth 1 -name "*.md" | wc -l) md_count=$(find "$(dirname "$file")" ! -name "$(basename "$(dirname "$file")")" -prune -name "*.md" | wc -l)
[ "$md_count" -eq 1 ] && continue [ "$md_count" -eq 1 ] && continue
fi fi

View File

@@ -3,34 +3,6 @@
script_dir=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) script_dir=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
awk_dir="$script_dir/awk" awk_dir="$script_dir/awk"
sed_ere() {
if sed -E '' </dev/null >/dev/null 2>&1; then
sed -E "$@"
else
sed -r "$@"
fi
}
sed_ere_inplace() {
script="$1"
file="$2"
tmp="${file}.tmp.$$"
sed_ere "$script" "$file" > "$tmp" && mv "$tmp" "$file" || {
rm -f "$tmp"
return 1
}
}
sed_ere_inplace_n() {
script="$1"
file="$2"
tmp="${file}.tmp.$$"
sed_ere -n "$script" "$file" > "$tmp" && mv "$tmp" "$file" || {
rm -f "$tmp"
return 1
}
}
sed_inplace() { sed_inplace() {
script="$1" script="$1"
file="$2" file="$2"
@@ -44,711 +16,49 @@ sed_inplace() {
temp_file="/tmp/markdown.$$" temp_file="/tmp/markdown.$$"
cat "$@" > "$temp_file" cat "$@" > "$temp_file"
# backslash escapes for literal characters and inline code masking # Mask
awk ' awk -f "$awk_dir/mask_inline_code.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
function mask(s, t) { awk -f "$awk_dir/mask_plain.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
t = s
gsub(/\*/, "\034P0\034", t)
gsub(/_/, "\034P1\034", t)
gsub(/`/, "\034P2\034", t)
gsub(/\[/, "\034P3\034", t)
gsub(/\]/, "\034P4\034", t)
gsub(/\(/, "\034P5\034", t)
gsub(/\)/, "\034P6\034", t)
gsub(/!/, "\034P7\034", t)
gsub(/\$/, "\034P8\034", t)
gsub(/#/, "\034P9\034", t)
gsub(/\+/, "\034P10\034", t)
gsub(/-/, "\034P11\034", t)
gsub(/\\/, "\034P12\034", t)
gsub(/</, "\034P13\034", t)
gsub(/>/, "\034P14\034", t)
return t
}
{
# backslash escapes
gsub(/\\\*/, "\034P0\034")
gsub(/\\_/, "\034P1\034")
gsub(/\\`/, "\034P2\034")
gsub(/\\\[/, "\034P3\034")
gsub(/\\\]/, "\034P4\034")
gsub(/\\\(/, "\034P5\034")
gsub(/\\\)/, "\034P6\034")
gsub(/\\!/, "\034P7\034")
gsub(/\\\$/, "\034P8\034")
gsub(/\\#/, "\034P9\034")
gsub(/\\\+/, "\034P10\034")
gsub(/\\\-/, "\034P11\034")
gsub(/\\\\/, "\034P12\034")
gsub(/\\</, "\034P13\034")
gsub(/\\>/, "\034P14\034")
# inline code (1 or 2 backticks)
line = $0
if (line ~ /^```/) {
print line
next
}
out = ""
p = 1
while (match(substr(line, p), /`+/)) {
pstart = p + RSTART - 1
plen = RLENGTH
if (plen >= 3) {
out = out substr(line, p, pstart - p + plen)
p = pstart + plen
continue
}
# Found 1 or 2 backticks at pstart
# Search for closing marker
marker = substr(line, pstart, plen)
tail = substr(line, pstart + plen)
mpos = index(tail, marker)
if (mpos > 0) {
# Check if it is followed by more backticks
if (substr(tail, mpos + plen, 1) == "`") {
# Not a match, treat as literal
out = out substr(line, p, pstart - p + plen)
p = pstart + plen
continue
}
# Found match!
content = substr(tail, 1, mpos - 1)
out = out substr(line, p, pstart - p)
if (plen == 2 && substr(content, 1, 1) == " " && substr(content, length(content), 1) == " ") {
content = substr(content, 2, length(content) - 2)
}
out = out "<code>" mask(content) "</code>"
p = pstart + plen + mpos + plen - 1
} else {
# No closing marker, treat as literal
out = out substr(line, p, pstart - p + plen)
p = pstart + plen
}
}
out = out substr(line, p)
print out
}' "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
awk '
function find_unescaped_tag(s, tag, p, off, pos) {
p = 1
while (1) {
off = index(substr(s, p), tag)
if (off == 0) return 0
pos = p + off - 1
if (pos == 1 || substr(s, pos - 1, 1) != "\\") return pos
p = pos + 1
}
}
function mask_plain(s, t) {
t = s
gsub(/\*/, "\034P0\034", t)
gsub(/_/, "\034P1\034", t)
gsub(/`/, "\034P2\034", t)
gsub(/\[/, "\034P3\034", t)
gsub(/\]/, "\034P4\034", t)
gsub(/\(/, "\034P5\034", t)
gsub(/\)/, "\034P6\034", t)
gsub(/!/, "\034P7\034", t)
gsub(/\$/, "\034P8\034", t)
return t
}
BEGIN { in_plain = 0 }
{
line = $0
out = ""
while (1) {
if (!in_plain) {
pos = find_unescaped_tag(line, "<plain>")
if (pos == 0) {
out = out line
break
}
out = out substr(line, 1, pos - 1) "<mfmplain>"
line = substr(line, pos + 7)
in_plain = 1
} else {
pos = find_unescaped_tag(line, "</plain>")
if (pos == 0) {
out = out mask_plain(line)
line = ""
break
}
out = out mask_plain(substr(line, 1, pos - 1)) "</mfmplain>"
line = substr(line, pos + 8)
in_plain = 0
}
}
print out
}
' "$temp_file" > "$temp_file.plain.$$" && mv "$temp_file.plain.$$" "$temp_file"
# Reference links
refs=$(cat "$@" | awk '/^\[[^\]]+\]: +/')
IFS=' IFS='
' '
refs=$(sed_ere -n "/^\[.+\]: +/p" "$@") for ref in $refs; do
for ref in $refs ref_id=$(echo "$ref" | sed 's/^\[\(.*\)\]: .*/\1/')
do ref_url=$(echo "$ref" | sed 's/^\[.*\]: \([^ ]*\).*/\1/')
ref_id=$(printf %s "$ref" | sed_ere -n "s/^\[(.+)\]: .*/\1/p" | tr -d '\n') ref_title=$(echo "$ref" | sed -n 's/^\[.*\]: [^ ]* "\(.*\)"/\1/p' | sed 's@|@!@g')
ref_url=$(printf %s "$ref" | sed_ere -n "s/^\[.+\]: (.+)/\1/p" | cut -d' ' -f1 | tr -d '\n') sed_inplace "s|!\[\([^]]*\)\]\[$ref_id\]|<img src=\"$ref_url\" title=\"$ref_title\" alt=\"\1\" />|g" "$temp_file"
ref_title=$(printf %s "$ref" | sed_ere -n "s/^\[.+\]: (.+) \"(.+)\"/\2/p" | sed 's@|@!@g' | tr -d '\n') sed_inplace "s|\[\([^]]*\)\]\[$ref_id\]|<a href=\"$ref_url\" title=\"$ref_title\">\1</a>|g" "$temp_file"
sed_inplace "s|!\[$ref_id\]\[\]|<img src=\"$ref_url\" title=\"$ref_title\" alt=\"$ref_id\" />|g" "$temp_file"
sed_inplace "s|\[$ref_id\]\[\]|<a href=\"$ref_url\" title=\"$ref_title\">$ref_id</a>|g" "$temp_file"
done
sed_inplace "/^\[[^\]]*\]: +/d" "$temp_file"
# reference-style image using the label # Blocks
sed_ere_inplace "s|!\[([^]]+)\]\[($ref_id)\]|<img src=\"$ref_url\" title=\"$ref_title\" alt=\"\1\" />|g" "$temp_file" sed_inplace "s/^>!\[/> [!/g" "$temp_file"
# reference-style link using the label sed_inplace "s/^>\[!/> [!/g" "$temp_file"
sed_ere_inplace "s|\[([^]]+)\]\[($ref_id)\]|<a href=\"$ref_url\" title=\"$ref_title\">\1</a>|g" "$temp_file"
# implicit reference-style while grep '^>' "$temp_file" >/dev/null; do
sed_ere_inplace "s|!\[($ref_id)\]\[\]|<img src=\"$ref_url\" title=\"$ref_title\" alt=\"\1\" />|g" "$temp_file" awk -f "$awk_dir/blockquote.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
# implicit reference-style
sed_ere_inplace "s|\[($ref_id)\]\[\]|<a href=\"$ref_url\" title=\"$ref_title\">\1</a>|g" "$temp_file"
done done
# delete the reference lines awk -f "$awk_dir/blockquote_to_admonition.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
sed_ere_inplace "/^\[.+\]: +/d" "$temp_file" awk -f "$awk_dir/fenced_code.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
awk -f "$awk_dir/indented_code.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
# normalize GitHub admonition shorthand in blockquotes awk -f "$awk_dir/pipe_tables.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
sed_ere_inplace ' awk -f "$awk_dir/headers.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
/^>!\[/s/^>!\[/> [!/ awk -f "$awk_dir/lists.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
/^>\[!/s/^>\[!/> [!/
s/^>([^[:space:]>])/> \1/ sed_inplace "s/^\*\*\*+$/<hr \/>/g" "$temp_file"
' "$temp_file" sed_inplace "s/^---+$/<hr \/>/g" "$temp_file"
sed_inplace "s/^___+$/<hr \/>/g" "$temp_file"
# blockquotes
# use grep to find all the nested blockquotes # Spacing
while grep '^> ' "$temp_file" >/dev/null awk -f "$awk_dir/breaks.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
do awk -f "$awk_dir/paragraphs.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
sed_ere_inplace_n '
/^$/b blockquote # Inline styles
awk -f "$awk_dir/markdown_inline.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file"
H
$ b blockquote
b
:blockquote
x
s/(\n+)(> .*)/\1<blockquote>\n\2\n<\/blockquote>/ # wrap the tags in a blockquote
p
' "$temp_file"
sed_inplace '1 d' "$temp_file" # cleanup superfluous first line
# cleanup blank lines and remove subsequent blockquote characters
sed_ere_inplace '
/^> /s/^> (.*)/\1/
' "$temp_file"
done
# convert [!TYPE] blockquotes into admonition blocks
awk '
function cap(s) { return toupper(substr(s, 1, 1)) tolower(substr(s, 2)) }
BEGIN { count = 0 }
{ lines[++count] = $0 }
END {
i = 1
while (i <= count) {
if (lines[i] == "<blockquote>") {
j = i + 1
while (j <= count && lines[j] != "</blockquote>") j++
if (j <= count) {
first = ""
first_idx = 0
for (k = i + 1; k < j; k++) {
if (lines[k] != "") {
first = lines[k]
first_idx = k
break
}
}
if (first ~ /^\[![A-Za-z]+\]$/) {
kind = first
sub(/^\[!/, "", kind)
sub(/\]$/, "", kind)
lkind = tolower(kind)
if (lkind == "note" || lkind == "tip" || lkind == "important" || lkind == "warning" || lkind == "caution") {
print "<div class=\"admonition admonition-" lkind "\">"
print "<p class=\"admonition-title\">" cap(lkind) "</p>"
has_body = 0
for (k = first_idx + 1; k < j; k++) {
if (lines[k] != "") {
print "<p>" lines[k] "</p>"
has_body = 1
}
}
if (!has_body) print "<p></p>"
print "</div>"
i = j + 1
continue
}
}
}
}
print lines[i]
i++
}
}
' "$temp_file" > "$temp_file.admon.$$" && mv "$temp_file.admon.$$" "$temp_file"
# Setext-style headers
sed_ere_inplace_n '
# Setext-style headers need to be wrapped around newlines
/^$/ b print
# else, append to holding area
H
$ b print
b
:print
x
/=+$/{
s/\n(.*)\n=+$/\n<h1>\1<\/h1>/
p
b
}
/\-+$/{
s/\n(.*)\n\-+$/\n<h2>\1<\/h2>/
p
b
}
p
' "$temp_file"
sed_inplace '1 d' "$temp_file" # cleanup superfluous first line
# atx-style headers and other block styles
sed_ere_inplace '
/^#+ /s/ #+$// # kill all ending header characters
/^# /s/# ([A-Za-z0-9 ]*)(.*)/<h1 id="\1">\1\2<\/h1>/g # H1
/^#{2} /s/#{2} ([A-Za-z0-9 ]*)(.*)/<h2 id="\1">\1\2<\/h2>/g # H2
/^#{3} /s/#{3} ([A-Za-z0-9 ]*)(.*)/<h3 id="\1">\1\2<\/h3>/g # H3
/^#{4} /s/#{4} ([A-Za-z0-9 ]*)(.*)/<h4 id="\1">\1\2<\/h4>/g # H4
/^#{5} /s/#{5} ([A-Za-z0-9 ]*)(.*)/<h5 id="\1">\1\2<\/h5>/g # H5
/^#{6} /s/#{6} ([A-Za-z0-9 ]*)(.*)/<h6 id="\1">\1\2<\/h6>/g # H6
/^\*\*\*+$/s/\*\*\*+/<hr \/>/ # hr with *
/^---+$/s/---+/<hr \/>/ # hr with -
/^___+$/s/___+/<hr \/>/ # hr with _
' "$temp_file"
# unordered lists
# use grep to find all the nested lists
while grep '^[\*\+\-] ' "$temp_file" >/dev/null
do
sed_ere_inplace_n '
# wrap the list
/^$/b list
# wrap the li tags then add to the hold buffer
# use uli instead of li to avoid collisions when processing nested lists
/^[\*\+\-] /s/[\*\+\-] (.*)/<\/uli>\n<uli>\n\1/
H
$ b list # if at end of file, check for the end of a list
b # else, branch to the end of the script
# this is where a list is checked for the pattern
:list
# exchange the hold space into the pattern space
x
# look for the list items, if there wrap the ul tags
/<uli>/{
s/(.*)/\n<ul>\1\n<\/uli>\n<\/ul>/ # close the ul tags
s/\n<\/uli>// # kill the first superfluous closing tag
p
b
}
p
' "$temp_file"
sed_inplace '1 d' "$temp_file" # cleanup superfluous first line
# convert to the proper li to avoid collisions with nested lists
sed_inplace 's/uli>/li>/g' "$temp_file"
# prepare any nested lists
sed_ere_inplace '/^[\*\+\-] /s/(.*)/\n\1\n/' "$temp_file"
done
# ordered lists
# use grep to find all the nested lists
while grep -E '^[1-9]+\. ' "$temp_file" >/dev/null
do
sed_ere_inplace_n '
# wrap the list
/^$/b list
# wrap the li tags then add to the hold buffer
# use oli instead of li to avoid collisions when processing nested lists
/^[1-9]+\. /s/[1-9]+\. (.*)/<\/oli>\n<oli>\n\1/
H
$ b list # if at end of file, check for the end of a list
b # else, branch to the end of the script
:list
# exchange the hold space into the pattern space
x
# look for the list items, if there wrap the ol tags
/<oli>/{
s/(.*)/\n<ol>\1\n<\/oli>\n<\/ol>/ # close the ol tags
s/\n<\/oli>// # kill the first superfluous closing tag
p
b
}
p
' "$temp_file"
sed_inplace '1 d' "$temp_file" # cleanup superfluous first line
# convert list items into proper list items to avoid collisions with nested lists
sed_inplace 's/oli>/li>/g' "$temp_file"
# prepare any nested lists
sed_ere_inplace '/^[1-9]+\. /s/(.*)/\n\1\n/' "$temp_file"
done
# make escaped periods literal
sed_ere_inplace '/^[1-9]+\\. /s/([1-9]+)\\. /\1\. /' "$temp_file"
# fenced code blocks (triple backticks)
awk '
BEGIN { in_fence = 0; first_line = 0 }
{
if (!in_fence && $0 ~ /^```/) {
printf "<pre><code>"
in_fence = 1
first_line = 1
next
}
if (in_fence && $0 ~ /^```[[:space:]]*$/) {
print "</code></pre>"
in_fence = 0
next
}
if (in_fence) {
if (first_line) {
first_line = 0
if ($0 == "") next
}
print
} else {
print
}
}
END {
if (in_fence) print "</code></pre>"
}
' "$temp_file" > "$temp_file.fence.$$" && mv "$temp_file.fence.$$" "$temp_file"
# code blocks
sed_ere_inplace_n '
# if at end of file, append the current line to the hold buffer and print it
${
H
b code
}
# wrap the code block on any non code block lines
/^\t| {4}/!b code
# else, append to the holding buffer and do nothing
H
b # else, branch to the end of the script
:code
# exchange the hold space with the pattern space
x
# look for the code items, if there wrap the pre-code tags
/\t| {4}/{
s/(\t| {4})(.*)/<pre><code>\n\1\2\n<\/code><\/pre>/ # wrap the ending tags
p
b
}
p
' "$temp_file"
sed_inplace '1 d' "$temp_file" # cleanup superfluous first line
# convert html characters inside pre-code tags into printable representations
sed_ere_inplace '
# get inside pre-code tags
/^<pre><code>/{
:inside
n
# if you found the end tags, branch out
/^<\/code><\/pre>/!{
s/&/\&amp;/g # ampersand
s/</\&lt;/g # less than
s/>/\&gt;/g # greater than
b inside
}
}
' "$temp_file"
# remove the first tab (or 4 spaces) from the code lines
sed_ere_inplace 's/^\t| {4}(.*)/\1/' "$temp_file"
# markdown pipe tables
awk '
function trim(s) {
sub(/^[[:space:]]+/, "", s)
sub(/[[:space:]]+$/, "", s)
return s
}
function is_table_row(line, t) {
t = line
return (t ~ /^[[:space:]]*\|/ && t ~ /\|[[:space:]]*$/)
}
function is_table_sep(line, t) {
if (!is_table_row(line)) return 0
t = line
gsub(/[|:\-[:space:]]/, "", t)
return (t == "" && line ~ /-/)
}
function split_row(line, out, n, i, raw) {
raw = line
sub(/^[[:space:]]*\|/, "", raw)
sub(/\|[[:space:]]*$/, "", raw)
n = split(raw, out, /\|/)
for (i = 1; i <= n; i++) out[i] = trim(out[i])
return n
}
function align_for(sep, t) {
t = trim(sep)
if (t ~ /^:-+:$/) return "center"
if (t ~ /^:-+$/) return "left"
if (t ~ /^-+:$/) return "right"
return ""
}
function render_cell(cell, inner) {
inner = trim(cell)
if (inner ~ /^```.*```$/) {
sub(/^```[[:space:]]*/, "", inner)
sub(/[[:space:]]*```$/, "", inner)
return "<pre><code>" inner "</code></pre>"
}
return inner
}
BEGIN { count = 0 }
{ lines[++count] = $0 }
END {
in_pre = 0
i = 1
while (i <= count) {
if (lines[i] ~ /^<pre><code>/) {
in_pre = 1
print lines[i]
i++
continue
}
if (in_pre) {
print lines[i]
if (lines[i] ~ /^<\/code><\/pre>/) in_pre = 0
i++
continue
}
if (i < count && is_table_row(lines[i]) && is_table_sep(lines[i + 1])) {
n_header = split_row(lines[i], header)
n_sep = split_row(lines[i + 1], sep)
n_cols = (n_header > n_sep ? n_header : n_sep)
print "<table>"
print "<thead>"
print "<tr>"
for (c = 1; c <= n_cols; c++) {
cell = (c <= n_header ? render_cell(header[c]) : "")
a = (c <= n_sep ? align_for(sep[c]) : "")
if (a != "") print "<th style=\"text-align: " a ";\">" cell "</th>"
else print "<th>" cell "</th>"
}
print "</tr>"
print "</thead>"
j = i + 2
print "<tbody>"
while (j <= count && is_table_row(lines[j])) {
n_body = split_row(lines[j], body)
print "<tr>"
for (c = 1; c <= n_cols; c++) {
cell = (c <= n_body ? render_cell(body[c]) : "")
a = (c <= n_sep ? align_for(sep[c]) : "")
if (a != "") print "<td style=\"text-align: " a ";\">" cell "</td>"
else print "<td>" cell "</td>"
}
print "</tr>"
j++
}
print "</tbody>"
print "</table>"
i = j
continue
}
if (is_table_sep(lines[i]) && i < count && is_table_row(lines[i + 1])) {
n_sep = split_row(lines[i], sep)
n_cols = n_sep
print "<table>"
print "<thead>"
print "<tr>"
for (c = 1; c <= n_cols; c++) {
a = align_for(sep[c])
if (a != "") print "<th style=\"text-align: " a ";\"></th>"
else print "<th></th>"
}
print "</tr>"
print "</thead>"
j = i + 1
print "<tbody>"
while (j <= count && is_table_row(lines[j])) {
n_body = split_row(lines[j], body)
print "<tr>"
for (c = 1; c <= n_cols; c++) {
cell = (c <= n_body ? render_cell(body[c]) : "")
a = align_for(sep[c])
if (a != "") print "<td style=\"text-align: " a ";\">" cell "</td>"
else print "<td>" cell "</td>"
}
print "</tr>"
j++
}
print "</tbody>"
print "</table>"
i = j
continue
}
print lines[i]
i++
}
}
' "$temp_file" > "$temp_file.table.$$" && mv "$temp_file.table.$$" "$temp_file"
# br tags
sed_ere_inplace '
# if an empty line, append it to the next line, then check on whether there is two in a row
/^$/ {
N
N
/^\n{2}/s/(.*)/\n<br \/>\1/
}
' "$temp_file"
# emphasis and strong emphasis and strikethrough
sed_ere_inplace_n '
# batch up the entire stream of text until a line break in the action
/^$/b emphasis
H
$ b emphasis
b
:emphasis
x
s/\*\*([^\n]+)\*\*/<strong>\1<\/strong>/g
s/__([^_\n]+)__/<strong>\1<\/strong>/g
s/\*([^\*\n]+)\*/<em>\1<\/em>/g
s/([^\\])_([^_\n]+)_/\1<em>\2<\/em>/g
s/\~\~([^\n]+)\~\~/<strike>\1<\/strike>/g
p
' "$temp_file"
sed_inplace '1 d' "$temp_file" # cleanup superfluous first line
# paragraphs
sed_ere_inplace_n '
# if an empty line, check the paragraph
/^$/ b para
# else append it to the hold buffer
H
# at end of file, check paragraph
$ b para
# now branch to end of script
b
# this is where a paragraph is checked for the pattern
:para
# return the entire paragraph into the pattern space
x
# look for non block-level elements, if there - print the p tags
/\n<(div|table|pre|p|[ou]l|h[1-6]|[bh]r|blockquote|li)/!{
s/(\n+)(.*)/\1<p>\n\2\n<\/p>/
p
b
}
p
' "$temp_file"
sed_inplace '1 d' "$temp_file" # cleanup superfluous first line
# cleanup area where P tags have broken nesting
sed_ere_inplace_n '
# if the line looks like like an end tag
/^<\/(div|table|pre|p|[ou]l|h[1-6]|[bh]r|blockquote)>/{
h
# if EOF, print the line
$ {
x
b done
}
# fetch the next line and check on whether or not it is a P tag
n
/^<\/p>/{
G
b done
}
# else, append the line to the previous line and print them both
H
x
}
:done
p
' "$temp_file"
# inline styles and special characters
sed_ere_inplace '
/^<pre><code>/,/^<\/code><\/pre>/b
s/<(http[s]?:\/\/.*)>/<a href=\"\1\">\1<\/a>/g # automatic links
s/<(.*@.*\..*)>/<a href=\"mailto:\1\">\1<\/a>/g # automatic email address links
# force-inline image syntax (double bang)
s/!!\[([^]]*)\]\(([^)]*) \"([^\"]*)\"\)/<img data-force-inline=\"1\" alt=\"\1\" src=\"\2\" title=\"\3\" \/>/g
s/!!\[([^]]*)\]\(([^)]*)\)/<img data-force-inline=\"1\" alt=\"\1\" src=\"\2\" \/>/g
s/(^|[^\\])!\[([^]]*)\]\(([^)]*) \"([^\"]*)\"\)/\1<img alt=\"\2\" src=\"\3\" title=\"\4\" \/>/g # inline image with title
s/(^|[^\\])!\[([^]]*)\]\(([^)]*)\)/\1<img alt=\"\2\" src=\"\3\" \/>/g # inline image without title
s/(^|[^\\!])\[([^]]*)\]\(([^)]*) \"([^\"]*)\"\)/\1<a href=\"\3\" title=\"\4\">\2<\/a>/g # inline link with title
s/(^|[^\\!])\[([^]]*)\]\(([^)]*)\)/\1<a href=\"\3\">\2<\/a>/g # inline link
# MFM font syntax
s/\$\[font\.serif ([^]]+)\]/<span style=\"font-family: serif;\">\1<\/span>/g
s/\$\[font\.monospace ([^]]+)\]/<span style=\"font-family: monospace;\">\1<\/span>/g
s/\$\[font\.sans ([^]]+)\]/<span style=\"font-family: sans-serif;\">\1<\/span>/g
# special characters
/&.+;/!s/&/\&amp;/g # ampersand
/<[\/a-zA-Z]/!s/</\&lt;/g# less than bracket
' "$temp_file"
# display and cleanup
awk -v input_file="$1" -v site_root="$MARKDOWN_SITE_ROOT" -v fallback_file="$MARKDOWN_FALLBACK_FILE" -f "$awk_dir/markdown_embed.awk" "$temp_file" awk -v input_file="$1" -v site_root="$MARKDOWN_SITE_ROOT" -v fallback_file="$MARKDOWN_FALLBACK_FILE" -f "$awk_dir/markdown_embed.awk" "$temp_file"
rm "$temp_file" rm "$temp_file"