From ef16ed4c88385f2bbf386ac8d9c22e4860cc106d Mon Sep 17 00:00:00 2001 From: "N0\\A" Date: Mon, 23 Mar 2026 11:39:05 +0100 Subject: [PATCH] feat: frontmatter --- awk/frontmatter.awk | 46 ++++++++++++ awk/headers.awk | 6 ++ awk/markdown_embed.awk | 33 +++++++-- awk/markdown_inline.awk | 36 +++++++++- kewt.sh | 152 ++++++++++++++++++++++++++++++++-------- markdown.sh | 7 +- site/index.md | 26 ++++++- 7 files changed, 268 insertions(+), 38 deletions(-) create mode 100644 awk/frontmatter.awk diff --git a/awk/frontmatter.awk b/awk/frontmatter.awk new file mode 100644 index 0000000..af3e6c3 --- /dev/null +++ b/awk/frontmatter.awk @@ -0,0 +1,46 @@ +BEGIN { + state = "start" +} +{ + if (state == "start") { + if ($0 == "---") { + state = "in_fm" + next + } else { + state = "body" + print + next + } + } + if (state == "in_fm") { + if ($0 == "---") { + state = "body" + next + } + line = $0 + if (line ~ /^[[:space:]]*$/ || line ~ /^[[:space:]]*#/) next + if (line !~ /=/) next + + key = line + val = line + sub(/=.*/, "", key) + sub(/[^=]*=/, "", val) + + gsub(/^[[:space:]]+|[[:space:]]+$/, "", key) + gsub(/^[[:space:]]+|[[:space:]]+$/, "", val) + + if (val ~ /^".*"$/) { + val = substr(val, 2, length(val) - 2) + gsub(/\\"/, "\"", val) + } else if (val ~ /^'.*'$/) { + val = substr(val, 2, length(val) - 2) + gsub(/\\'/, "'", val) + } + + if (fm_out != "") { + print key "=" val >> fm_out + } + next + } + print +} diff --git a/awk/headers.awk b/awk/headers.awk index 6362624..79eff45 100644 --- a/awk/headers.awk +++ b/awk/headers.awk @@ -3,8 +3,14 @@ function strip_markdown(s) { gsub(/[*_`~]/, "", s) gsub(/[\[\]]/, "", s) gsub(/\([^\)]*\)/, "", s) + s = tolower(s) + gsub(/[^a-z0-9 -]/, "", s) gsub(/^[[:space:]]+|[[:space:]]+$/, "", s) gsub(/[[:space:]]+/, "-", s) + gsub(/-{2,}/, "-", s) + gsub(/^-+|-+$/, "", s) + if (length(s) > 80) s = substr(s, 1, 80) + gsub(/-+$/, "", s) return s } function print_header(line) { diff --git a/awk/markdown_embed.awk b/awk/markdown_embed.awk index eef985a..75cc2a5 100644 --- a/awk/markdown_embed.awk +++ b/awk/markdown_embed.awk @@ -204,7 +204,7 @@ function render_embed(src, alt, has_alt, force_inline, ext, local_path, conte } if (is_audio_ext(ext)) return "" if (is_video_ext(ext)) return "" - return "" + return "" } if (is_image_ext(ext)) { @@ -223,7 +223,29 @@ function render_embed(src, alt, has_alt, force_inline, ext, local_path, conte } } - return "" + return "" +} + +function render_typed_embed(etype, src, alt, has_alt, local_path, content) { + if (etype == "i") { + if (has_alt) return "\""" + return "" + } + if (etype == "v") return "" + if (etype == "a") return "" + if (etype == "f") return "" + if (etype == "e") { + if (!is_global_url(src)) { + local_path = resolve_local_path(src) + if (local_path != "") { + content = read_file(local_path) + if (content ~ /\n$/) sub(/\n$/, "", content) + return content + } + } + return render_embed(src, alt, has_alt, 1) + } + return render_embed(src, alt, has_alt, 0) } function extract_attr(tag, attr, pat, m, token) { @@ -319,7 +341,7 @@ function apply_td_vertical_align(line, out, rest, seg, td_tag, img_tag, after return out rest } -function rewrite_img_tags(line, out, rest, tag, src, alt, force_inline_tag, pre, post, repl) { +function rewrite_img_tags(line, out, rest, tag, src, alt, force_inline_tag, embed_type, pre, post, repl) { out = "" rest = line while (match(rest, /]*\/?>/)) { @@ -329,7 +351,10 @@ function rewrite_img_tags(line, out, rest, tag, src, alt, force_inline_tag, p src = extract_attr(tag, "src") alt = extract_attr(tag, "alt") force_inline_tag = extract_attr(tag, "data-force-inline") - if (is_image_ext(ext_of(src)) && force_inline_tag == "") { + embed_type = extract_attr(tag, "data-embed-type") + if (embed_type != "") { + repl = render_typed_embed(embed_type, src, alt, (alt != "")) + } else if (is_image_ext(ext_of(src)) && force_inline_tag == "") { # Preserve hand-written attributes (style/class/etc) for normal images. repl = tag } else { diff --git a/awk/markdown_inline.awk b/awk/markdown_inline.awk index 7056f75..66e7ee7 100644 --- a/awk/markdown_inline.awk +++ b/awk/markdown_inline.awk @@ -20,9 +20,11 @@ function mask_html_tags(s, out, rest, start, len, tag, token) { return out rest } -function restore_html_tags(s, i) { +function restore_html_tags(s, i, val) { for (i = 1; i <= html_tag_count; i++) { - gsub(html_tag_token[i], html_tag_value[i], s) + val = html_tag_value[i] + gsub(/&/, "\\\\&", val) + gsub(html_tag_token[i], val, s) } return s } @@ -58,6 +60,36 @@ function restore_html_tags(s, i) { line = substr(line, 1, start - 1) repl substr(line, start + len) } + # typed embeds: !i, !v, !a, !f, !e + while (match(line, /![ivafe]\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) { + start = RSTART; len = RLENGTH + token = substr(line, start, len) + etype = substr(token, 2, 1) + match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2) + match(token, /"[^"]*"/); etitle = substr(token, RSTART + 1, RLENGTH - 2) + match(token, /\([^\)]+/); inner = substr(token, RSTART + 1, RLENGTH - 1) + sub(/[[:space:]]*"[^"]*"/, "", inner); src = inner + repl = "\""" + line = substr(line, 1, start - 1) repl substr(line, start + len) + } + while (match(line, /![ivafe]\[[^\]]*\]\([^\)]+\)/)) { + start = RSTART; len = RLENGTH + token = substr(line, start, len) + etype = substr(token, 2, 1) + match(token, /\[[^\]]*\]/); alt = substr(token, RSTART + 1, RLENGTH - 2) + match(token, /\([^\)]+/); src = substr(token, RSTART + 1, RLENGTH - 1) + repl = "\""" + line = substr(line, 1, start - 1) repl substr(line, start + len) + } + while (match(line, /![ivafe]\[[^\]]+\]/)) { + start = RSTART; len = RLENGTH + token = substr(line, start, len) + etype = substr(token, 2, 1) + src = substr(token, 4, len - 4) + repl = "" + line = substr(line, 1, start - 1) repl substr(line, start + len) + } + # force-inline image syntax (double bang) while (match(line, /!!\[[^\]]*\]\([^\)]+ "[^"]*"\)/)) { start = RSTART; len = RLENGTH diff --git a/kewt.sh b/kewt.sh index 5692799..916e539 100755 --- a/kewt.sh +++ b/kewt.sh @@ -107,6 +107,7 @@ EOF create_new_post() { post_src_dir="$1" + post_user_title="$2" target_dir="$post_src_dir" if [ -n "$posts_dir" ]; then @@ -126,7 +127,12 @@ create_new_post() { counter=$((counter + 1)) done - touch "$file_path" + post_date_val="$(date "+%Y-%m-%d %H:%M")" + if [ -n "$post_user_title" ]; then + printf -- '---\ntitle = "%s"\ndate = "%s"\ndraft = false\n---\n# %s\n' "$post_user_title" "$post_date_val" "$post_user_title" > "$file_path" + else + printf -- '---\ndate = "%s"\ndraft = false\n---\n' "$post_date_val" > "$file_path" + fi echo "Created new post at '$file_path'." exit 0 @@ -263,6 +269,10 @@ while [ $# -gt 0 ]; do ;; --post) post_mode="true" + if [ $# -gt 1 ] && [ "${2#-}" = "$2" ]; then + post_title="$2" + shift + fi ;; --update) update_dir="." @@ -512,7 +522,7 @@ if [ -n "$posts_dir" ]; then HIDE_ARGS="$HIDE_ARGS -o -path '$src/$posts_dir/*'" fi -[ "$post_mode" = "true" ] && create_new_post "$src" +[ "$post_mode" = "true" ] && create_new_post "$src" "$post_title" asset_version="" if [ "$versioning" = "true" ]; then @@ -534,6 +544,24 @@ escape_html_attr() { -e 's/>/\>/g' } +parse_frontmatter() { + _fm_file="$1" + _fm_out="$KEWT_TMPDIR/fm_vals.txt" + : > "$_fm_out" + awk -v fm_out="$_fm_out" -f "$awk_dir/frontmatter.awk" "$_fm_file" > /dev/null + fm_title="" + fm_date="" + fm_draft="" + while IFS='=' read -r _fk _fv; do + case "$_fk" in + title) fm_title="$_fv" ;; + date) fm_date="$_fv" ;; + draft) fm_draft="$_fv" ;; + esac + done < "$_fm_out" + rm -f "$_fm_out" +} + nav_links_html() { [ -n "$nav_links" ] || return @@ -718,8 +746,12 @@ render_markdown() { head_extra="" fi + parse_frontmatter "$file" + page_title="$title" - if [ "$generate_page_title" = "true" ] && [ -n "$file" ] && [ -f "$file" ]; then + if [ -n "$fm_title" ]; then + page_title="$fm_title - $title" + elif [ "$generate_page_title" = "true" ] && [ -n "$file" ] && [ -f "$file" ]; then if [ "$is_home" = "true" ] && [ -n "$home_name" ]; then page_title="$home_name - $title" else @@ -814,30 +846,69 @@ eval "find \"$src\" \( $IGNORE_ARGS \) -prune -o -type d -print" | sort | while elif [ "${entry%.md}" != "$entry" ]; then label="${name%.md}" - # Try to get first heading - post_h=$(grep -m 1 '^# ' "$entry" | sed 's/^# *//') - if [ -n "$post_h" ]; then - post_h=$(echo "$post_h" | sed -e 's/\[//g' -e 's/\]//g' -e 's/!//g' -e 's/\*//g' -e 's/_//g' -e 's/`//g' -e 's/([^)]*)//g' | sed 's/\\//g') + # Parse frontmatter for date/title/draft + parse_frontmatter "$entry" + [ "$fm_draft" = "true" ] && continue - if [ "$rel_dir" = "$posts_dir" ] || [ "./$rel_dir" = "$posts_dir" ]; then - # For posts add date and time + # Try to get first heading + post_h="$fm_title" + if [ -z "$post_h" ]; then + post_h=$(grep -m 1 '^# ' "$entry" | sed 's/^# *//') + if [ -n "$post_h" ]; then + post_h=$(echo "$post_h" | sed -e 's/\[//g' -e 's/\]//g' -e 's/!//g' -e 's/\*//g' -e 's/_//g' -e 's/`//g' -e 's/([^)]*)//g' | sed 's/\\//g') + fi + fi + + is_post_entry="false" + if [ "$rel_dir" = "$posts_dir" ] || [ "./$rel_dir" = "$posts_dir" ]; then + is_post_entry="true" + fi + + if [ -n "$post_h" ]; then + if [ "$is_post_entry" = "true" ]; then + # Use frontmatter date if available, else parse from filename + if [ -n "$fm_date" ]; then + p_date=$(echo "$fm_date" | sed 's/^\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\).*/\1/') + p_time="" + if echo "$fm_date" | grep -q '^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}[ T_-]\?[0-9]\{2\}[:\-][0-9]\{2\}'; then + p_time=$(echo "$fm_date" | sed 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}[ T_-]\?\([0-9]\{2\}[:\-][0-9]\{2\}\).*/\1/' | tr '-' ':') + fi + else + p_date=$(echo "${name%.md}" | sed 's/^\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\).*/\1/') + p_time="00:00" + if echo "${name%.md}" | grep -q '^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-[0-9]\{2\}[:\-][0-9]\{2\}'; then + p_time=$(echo "${name%.md}" | sed 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-\([0-9]\{2\}[:\-][0-9]\{2\}\).*/\1/' | tr '-' ':') + fi + fi + if [ -n "$p_time" ]; then + label="$post_h - $p_date $p_time" + else + label="$post_h - $p_date" + fi + else + label="$post_h" + fi + elif [ "$is_post_entry" = "true" ]; then + # No heading; use date + if [ -n "$fm_date" ]; then + p_date=$(echo "$fm_date" | sed 's/^\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\).*/\1/') + p_time="" + if echo "$fm_date" | grep -q '^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}[ T_-]\?[0-9]\{2\}[:\-][0-9]\{2\}'; then + p_time=$(echo "$fm_date" | sed 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}[ T_-]\?\([0-9]\{2\}[:\-][0-9]\{2\}\).*/\1/' | tr '-' ':') + fi + if [ -n "$p_time" ]; then + label="$p_date $p_time" + else + label="$p_date" + fi + else p_date=$(echo "${name%.md}" | sed 's/^\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\).*/\1/') p_time="00:00" if echo "${name%.md}" | grep -q '^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-[0-9]\{2\}[:\-][0-9]\{2\}'; then p_time=$(echo "${name%.md}" | sed 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-\([0-9]\{2\}[:\-][0-9]\{2\}\).*/\1/' | tr '-' ':') fi - label="$post_h - $p_date $p_time" - else - label="$post_h" + label="$p_date $p_time" fi - elif [ "$rel_dir" = "$posts_dir" ] || [ "./$rel_dir" = "$posts_dir" ]; then - # No heading and date and time for posts - p_date=$(echo "${name%.md}" | sed 's/^\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\).*/\1/') - p_time="00:00" - if echo "${name%.md}" | grep -q '^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-[0-9]\{2\}[:\-][0-9]\{2\}'; then - p_time=$(echo "${name%.md}" | sed 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-\([0-9]\{2\}[:\-][0-9]\{2\}\).*/\1/' | tr '-' ':') - fi - label="$p_date $p_time" fi echo "- [$label](${name%.md}.html)" >> "$temp_index" else @@ -884,6 +955,11 @@ eval "find \"$src\" \( $IGNORE_ARGS \) -prune -o -type f -print" | sort | while fi if [ "${file%.md}" != "$file" ] && [ "$is_preserved" -eq 0 ]; then + # Skip draft files + parse_frontmatter "$file" + if [ "$fm_draft" = "true" ]; then + continue + fi is_home="false"; [ "$file" = "$src/index.md" ] && is_home="true" out_file="$out/${rel_path%.md}.html" if needs_rebuild "$file" "$out_file"; then @@ -941,20 +1017,34 @@ if [ "$generate_feed" = "true" ] && [ -n "$base_url" ]; then printf ' %s\n' "$title" >> "$feed_path" printf ' %s\n' "$build_date" >> "$feed_path" - find "$src" -type f -name '[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]*.md' -print | LC_ALL=C sort -r | while IFS= read -r post_file; do + find "$src" -type f -name '*.md' -path "*${posts_dir:-__no_posts__}*" -print | LC_ALL=C sort -r | while IFS= read -r post_file; do post_basename=$(basename "$post_file" .md) - # Extract YYYY-MM-DD - post_date=$(echo "$post_basename" | sed 's/^\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\).*/\1/') - # Extract HH:MM if present (e.g., 2026-03-17-10:30 or 2026-03-17-10:30_1) - post_time="00:00" - if echo "$post_basename" | grep -q '^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-[0-9]\{2\}[:\-][0-9]\{2\}'; then - post_time=$(echo "$post_basename" | sed 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-\([0-9]\{2\}[:\-][0-9]\{2\}\).*/\1/' | tr '-' ':') + # Parse frontmatter + parse_frontmatter "$post_file" + [ "$fm_draft" = "true" ] && continue + + # Use frontmatter date, fallback to filename + if [ -n "$fm_date" ]; then + post_date=$(echo "$fm_date" | sed 's/^\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\).*/\1/') + post_time="00:00" + if echo "$fm_date" | grep -q '^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}[ T_-]\?[0-9]\{2\}[:\-][0-9]\{2\}'; then + post_time=$(echo "$fm_date" | sed 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}[ T_-]\?\([0-9]\{2\}[:\-][0-9]\{2\}\).*/\1/' | tr '-' ':') + fi + else + post_date=$(echo "$post_basename" | sed 's/^\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\).*/\1/') + post_time="00:00" + if echo "$post_basename" | grep -q '^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-[0-9]\{2\}[:\-][0-9]\{2\}'; then + post_time=$(echo "$post_basename" | sed 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-\([0-9]\{2\}[:\-][0-9]\{2\}\).*/\1/' | tr '-' ':') + fi fi post_slug=$(echo "$post_basename" | sed -e 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}-[0-9]\{2\}[:\-][0-9]\{2\}//' -e 's/^[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}//' -e 's/^[_\-]//') - post_heading=$(grep -m 1 '^# ' "$post_file" | sed 's/^# *//') + post_heading="$fm_title" + if [ -z "$post_heading" ]; then + post_heading=$(grep -m 1 '^# ' "$post_file" | sed 's/^# *//') + fi if [ -z "$post_heading" ]; then if [ -n "$post_slug" ] && ! echo "$post_slug" | grep -q '^[0-9]\+$'; then post_heading=$(echo "$post_slug" | sed 's/-/ /g' | awk '{for(i=1;i<=NF;i++) $i=toupper(substr($i,1,1)) substr($i,2)}1') @@ -963,7 +1053,7 @@ if [ "$generate_feed" = "true" ] && [ -n "$base_url" ]; then fi fi post_heading=$(echo "$post_heading" | sed -e 's/\[//g' -e 's/\]//g' -e 's/!//g' -e 's/\*//g' -e 's/_//g' -e 's/`//g' -e 's/([^)]*)//g' | sed 's/\\//g') - post_title="$post_heading - $post_date $post_time" + feed_post_title="$post_heading - $post_date $post_time" rel_path="${post_file#"$src"}" rel_path="${rel_path#/}" @@ -972,6 +1062,8 @@ if [ "$generate_feed" = "true" ] && [ -n "$base_url" ]; then pub_year=$(echo "$post_date" | cut -d- -f1) pub_month=$(echo "$post_date" | cut -d- -f2) pub_day=$(echo "$post_date" | cut -d- -f3) + # zero-padded + pub_day=$(printf '%02d' "${pub_day#0}") case "$pub_month" in 01) pub_mon="Jan" ;; 02) pub_mon="Feb" ;; 03) pub_mon="Mar" ;; 04) pub_mon="Apr" ;; 05) pub_mon="May" ;; 06) pub_mon="Jun" ;; @@ -981,7 +1073,7 @@ if [ "$generate_feed" = "true" ] && [ -n "$base_url" ]; then pub_date="${pub_day} ${pub_mon} ${pub_year} ${post_time}:00 +0000" printf ' \n' >> "$feed_path" - printf ' %s\n' "$post_title" >> "$feed_path" + printf ' %s\n' "$feed_post_title" >> "$feed_path" printf ' %s\n' "$post_url" >> "$feed_path" printf ' %s\n' "$post_url" >> "$feed_path" printf ' %s\n' "$pub_date" >> "$feed_path" diff --git a/markdown.sh b/markdown.sh index 4176c71..9dac41e 100755 --- a/markdown.sh +++ b/markdown.sh @@ -18,7 +18,12 @@ sed_inplace() { temp_file="${KEWT_TMPDIR:-/tmp}/markdown.$$.md" cat "$@" > "$temp_file" -trap 'rm -f "$temp_file" "$temp_file.tmp"' EXIT INT TERM +trap 'rm -f "$temp_file" "$temp_file.tmp" "$temp_file.fm"' EXIT INT TERM + +# Frontmatter +fm_file="$temp_file.fm" +: > "$fm_file" +awk -v fm_out="$fm_file" -f "$awk_dir/frontmatter.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file" # Mask awk -f "$awk_dir/mask_inline_code.awk" "$temp_file" > "$temp_file.tmp" && mv "$temp_file.tmp" "$temp_file" diff --git a/site/index.md b/site/index.md index 74361c7..e8a5f95 100644 --- a/site/index.md +++ b/site/index.md @@ -14,9 +14,11 @@ It's meant to be a static site generator, like _[kew](https://github.com/uint23/ ## Features - No dependencies +- Frontmatter support (title, date, draft) - Supports many embed types - Automatic css variable replacement for older browsers - Automatic inlining and embedding of many filetypes with `\![link]` or `\![alt](link)` +- Typed embeds: `\!i`, `\!v`, `\!a`, `\!f`, `\!e` - Inline html support - MFM `$font` and `\` tags - GFM Admonition support (that's what the blocks like the warning block below are called) @@ -96,7 +98,7 @@ bpkg install n0va-bot/kewt `--new [title]` creates a new site directory with a copied `site.conf` and a default `index.md`. -`--post` creates a new empty markdown file in the configured `posts_dir` with the current date and time as the name. +`--post [title]` creates a new markdown file in the configured `posts_dir` with the current date/time as the name and creates the default frontmatter. ### site.conf @@ -169,6 +171,28 @@ custom_admonitions = "" - other global links are embedded as `