BEGIN { in_pre = 0 } function mask_html_tags(s, out, rest, start, len, tag, token) { out = "" rest = s html_tag_count = 0 while (match(rest, /<[^>]+>/)) { out = out substr(rest, 1, RSTART - 1) start = RSTART len = RLENGTH tag = substr(rest, start, len) html_tag_count++ html_tag_token[html_tag_count] = "\034HT" html_tag_count "\034" html_tag_value[html_tag_count] = tag out = out html_tag_token[html_tag_count] rest = substr(rest, start + len) } return out rest } function restore_html_tags(s, i) { for (i = 1; i <= html_tag_count; i++) { gsub(html_tag_token[i], html_tag_value[i], s) } return s } { if ($0 ~ /
/) {
in_pre = 1
}
if (in_pre) {
print
if ($0 ~ /<\/pre>/) {
in_pre = 0
}
next
}
line = $0
# automatic links
while (match(line, /]+>/)) {
start = RSTART; len = RLENGTH
url = substr(line, start + 1, len - 2)
repl = "" url ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# automatic email address links
while (match(line, /<[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}>/)) {
start = RSTART; len = RLENGTH
email = substr(line, start + 1, len - 2)
repl = "" email ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# force-inline image syntax (double bang)
while (match(line, /!!\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); src = inner
repl = "
"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /!!\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); src = substr(token, RSTART + 1, RLENGTH - 1)
repl = "
"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# inline image
while (match(line, /!\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); src = inner
repl = "
"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /!\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); src = substr(token, RSTART + 1, RLENGTH - 1)
repl = "
"
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# inline link
while (match(line, /\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && (substr(line, start - 1, 1) == "\\" || substr(line, start - 1, 1) == "!")) break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); text = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /"[^"]*"/); title = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1)
sub(/[[:space:]]*"[^"]*"/, "", inner); href = inner
repl = "" text ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
while (match(line, /\[[^\]]*\]\([^\)]+\)/)) {
start = RSTART; len = RLENGTH
if (start > 1 && (substr(line, start - 1, 1) == "\\" || substr(line, start - 1, 1) == "!")) break
token = substr(line, start, len)
match(token, /\[[^\]]*\]/); text = substr(token, RSTART + 1, RLENGTH - 2)
match(token, /\([^\)]+/); href = substr(token, RSTART + 1, RLENGTH - 1)
repl = "" text ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# MFM font syntax
while (match(line, /\$\[font\.serif [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 13, len - 14)
line = substr(line, 1, start - 1) "" content "" substr(line, start + len)
}
while (match(line, /\$\[font\.monospace [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 17, len - 18)
line = substr(line, 1, start - 1) "" content "" substr(line, start + len)
}
while (match(line, /\$\[font\.sans [^\]]+\]/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 12, len - 13)
line = substr(line, 1, start - 1) "" content "" substr(line, start + len)
}
line = mask_html_tags(line)
# Bold, Italic, Strikethrough (BRE-like logic in AWK)
# Strong Bold **
while (match(line, /\*\*[^*]+\*\*/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Strong Bold __
while (match(line, /__[^_]+__/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Italic *
while (match(line, /\*[^*]+\*/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 1, len - 2)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Italic _
while (match(line, /_[^_]+_/)) {
start = RSTART; len = RLENGTH
if (start > 1 && substr(line, start - 1, 1) == "\\") break
content = substr(line, start + 1, len - 2)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
# Strikethrough ~~
while (match(line, /~~[^~]+~~/)) {
start = RSTART; len = RLENGTH
content = substr(line, start + 2, len - 4)
repl = "" content ""
line = substr(line, 1, start - 1) repl substr(line, start + len)
}
line = restore_html_tags(line)
# special characters
if (line !~ /&[A-Za-z0-9#]+;/) {
gsub(/&/, "&", line)
}
p = 1
while (match(substr(line, p), /)) {
start = p + RSTART - 1
next_char = substr(line, start + 1, 1)
if (next_char !~ /^[\/A-Za-z]/) {
line = substr(line, 1, start - 1) "<" substr(line, start + 1)
p = start + 4
} else {
p = start + 1
}
}
print line
}