BEGIN { in_pre = 0 } { if ($0 ~ /
/) {
in_pre = 1
}
if (in_pre) {
print
if ($0 ~ /<\/pre>/) {
in_pre = 0
}
next
}
line = $0
# automatic links
while (match(line, /]+>/)) {
start = RSTART; len = RLENGTH
url = substr(line, start + 1, len - 2)
repl = "" url ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# automatic email address links
while (match(line, /<[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}>/)) {
start = RSTART; len = RLENGTH
email = substr(line, start + 1, len - 2)
repl = "" email ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# force-inline image syntax (double bang)
while (match(line, /!!\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); src = inner
repl = "
"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /!!\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); src = substr(token, RSTART + 1, RLENGTH - 1)
repl = "
"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# inline image
while (match(line, /!\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); src = inner
repl = "
"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /!\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); src = substr(token, RSTART + 1, RLENGTH - 1)
repl = "
"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# inline link
while (match(line, /\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && (substr(line, start - 1, 1) == "\\" || substr(line, start - 1, 1) == "!")) break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); text = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); href = inner
repl = "" text ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && (substr(line, start - 1, 1) == "\\" || substr(line, start - 1, 1) == "!")) break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); text = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); href = substr(token, RSTART + 1, RLENGTH - 1)
repl = "" text ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# MFM font syntax
while (match(line, /\$\[font\.serif [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 13, len - 14)
line = substr(line, 1, start - 1) "" content "" substr(line, start + len)
}
while (match(line, /\$\[font\.monospace [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 17, len - 18)
line = substr(line, 1, start - 1) "" content "" substr(line, start + len)
}
while (match(line, /\$\[font\.sans [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 12, len - 13)
line = substr(line, 1, start - 1) "" content "" substr(line, start + len)
}
# Bold, Italic, Strikethrough (BRE-like logic in AWK)
# Strong Bold **
while (match(line, /\*\*[^*]+\*\*/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Strong Bold __
while (match(line, /__[^_]+__/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Italic *
while (match(line, /\*[^*]+\*/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 1, len - 2)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Italic _
while (match(line, /_[^_]+_/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
content = substr(line, start + 1, len - 2)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Strikethrough ~~
while (match(line, /~~[^~]+~~/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# special characters
if (line !~ /&[A-Za-z0-9#]+;/) {
gsub(/&/, "&", line)
}
p = 1
while (match(substr(line, p), /)) {
start = p + RSTART - 1
next_char = substr(line, start + 1, 1)
if (next_char !~ /^[\/A-Za-z]/) {
line = substr(line, 1, start - 1) "<" substr(line, start + 1)
p = start + 4
} else {
p = start + 1
}
}
print line
}