From 1a1e72e6e96534018604380c44e4015fedf9c3fb Mon Sep 17 00:00:00 2001 From: Amber Date: Tue, 1 Mar 2022 20:07:41 -0600 Subject: [PATCH] initial commit --- markdown.sh | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++ outline | 12 ++ requirements | 4 + 3 files changed, 384 insertions(+) create mode 100755 markdown.sh create mode 100644 outline create mode 100644 requirements diff --git a/markdown.sh b/markdown.sh new file mode 100755 index 0000000..f310895 --- /dev/null +++ b/markdown.sh @@ -0,0 +1,368 @@ +#!/bin/bash + +< "$temp_file" + +# All of this below business is for reference-style links and images +# We need to loop across newlines and not spaces +IFS=' +' +refs=$(sed -nr "/^\[.+\]: +/p" "$@") +for ref in $refs +do + ref_id=$(echo -n "$ref" | sed -nr "s/^\[(.+)\]: .*/\1/p" | tr -d '\n') + ref_url=$(echo -n "$ref" | sed -nr "s/^\[.+\]: (.+)/\1/p" | cut -d' ' -f1 | tr -d '\n') + ref_title=$(echo -n "$ref" | sed -nr "s/^\[.+\]: (.+) \"(.+)\"/\2/p" | sed 's@|@!@g' | tr -d '\n') + + # reference-style image using the label + sed -ri "s|!\[([^]]+)\]\[($ref_id)\]|\"\1\"|gI" "$temp_file" + # reference-style link using the label + sed -ri "s|\[([^]]+)\]\[($ref_id)\]|\1|gI" "$temp_file" + + # implicit reference-style + sed -ri "s|!\[($ref_id)\]\[\]|\"\1\"|gI" "$temp_file" + # implicit reference-style + sed -ri "s|\[($ref_id)\]\[\]|\1|gI" "$temp_file" +done + +# delete the reference lines +sed -ri "/^\[.+\]: +/d" "$temp_file" + +# blockquotes +# use grep to find all the nested blockquotes +while grep '^> ' "$temp_file" >/dev/null +do + sed -nri ' +/^$/b blockquote + +H +$ b blockquote +b + +:blockquote +x +s/(\n+)(> .*)/\1
\n\2\n<\/blockquote>/ # wrap the tags in a blockquote +p +' "$temp_file" + + sed -i '1 d' "$temp_file" # cleanup superfluous first line + + # cleanup blank lines and remove subsequent blockquote characters + sed -ri ' +/^> /s/^> (.*)/\1/ +' "$temp_file" +done + +# Setext-style headers +sed -nri ' +# Setext-style headers need to be wrapped around newlines +/^$/ b print + +# else, append to holding area +H +$ b print +b + +:print +x +/=+$/{ +s/\n(.*)\n=+$/\n

\1<\/h1>/ +p +b +} +/\-+$/{ +s/\n(.*)\n\-+$/\n

\1<\/h2>/ +p +b +} +p +' "$temp_file" + +sed -i '1 d' "$temp_file" # cleanup superfluous first line + +# atx-style headers and other block styles +sed -ri ' +/^#+ /s/ #+$// # kill all ending header characters +/^# /s/# ([A-Za-z0-9 ]*)(.*)/

\1\2<\/h1>/g # H1 +/^#{2} /s/#{2} ([A-Za-z0-9 ]*)(.*)/

\1\2<\/h2>/g # H2 +/^#{3} /s/#{3} ([A-Za-z0-9 ]*)(.*)/

\1\2<\/h3>/g # H3 +/^#{4} /s/#{4} ([A-Za-z0-9 ]*)(.*)/

\1\2<\/h4>/g # H4 +/^#{5} /s/#{5} ([A-Za-z0-9 ]*)(.*)/

\1\2<\/h5>/g # H5 +/^#{6} /s/#{6} ([A-Za-z0-9 ]*)(.*)/
\1\2<\/h6>/g # H6 + +/^\*\*\*+$/s/\*\*\*+/
/ # hr with * +/^---+$/s/---+/
/ # hr with - +/^___+$/s/___+/
/ # hr with _ + +' "$temp_file" + +# unordered lists +# use grep to find all the nested lists +while grep '^[\*\+\-] ' "$temp_file" >/dev/null +do +sed -nri ' +# wrap the list +/^$/b list + +# wrap the li tags then add to the hold buffer +# use uli instead of li to avoid collisions when processing nested lists +/^[\*\+\-] /s/[\*\+\-] (.*)/<\/uli>\n\n\1/ + +H +$ b list # if at end of file, check for the end of a list +b # else, branch to the end of the script + +# this is where a list is checked for the pattern +:list +# exchange the hold space into the pattern space +x +# look for the list items, if there wrap the ul tags +//{ +s/(.*)/\n
    \1\n<\/uli>\n<\/ul>/ # close the ul tags +s/\n<\/uli>// # kill the first superfluous closing tag +p +b +} +p +' "$temp_file" + +sed -i '1 d' "$temp_file" # cleanup superfluous first line + +# convert to the proper li to avoid collisions with nested lists +sed -i 's/uli>/li>/g' "$temp_file" + +# prepare any nested lists +sed -ri '/^[\*\+\-] /s/(.*)/\n\1\n/' "$temp_file" +done + +# ordered lists +# use grep to find all the nested lists +while grep -E '^[1-9]+\. ' "$temp_file" >/dev/null +do +sed -nri ' +# wrap the list +/^$/b list + +# wrap the li tags then add to the hold buffer +# use oli instead of li to avoid collisions when processing nested lists +/^[1-9]+\. /s/[1-9]+\. (.*)/<\/oli>\n\n\1/ + +H +$ b list # if at end of file, check for the end of a list +b # else, branch to the end of the script + +:list +# exchange the hold space into the pattern space +x +# look for the list items, if there wrap the ol tags +//{ +s/(.*)/\n
      \1\n<\/oli>\n<\/ol>/ # close the ol tags +s/\n<\/oli>// # kill the first superfluous closing tag +p +b +} +p +' "$temp_file" + +sed -i '1 d' "$temp_file" # cleanup superfluous first line + +# convert list items into proper list items to avoid collisions with nested lists +sed -i 's/oli>/li>/g' "$temp_file" + +# prepare any nested lists +sed -ri '/^[1-9]+\. /s/(.*)/\n\1\n/' "$temp_file" +done + +# make escaped periods literal +sed -ri '/^[1-9]+\\. /s/([1-9]+)\\. /\1\. /' "$temp_file" + + +# code blocks +sed -nri ' +# if at end of file, append the current line to the hold buffer and print it +${ +H +b code +} + +# wrap the code block on any non code block lines +/^\t| {4}/!b code + +# else, append to the holding buffer and do nothing +H +b # else, branch to the end of the script + +:code +# exchange the hold space with the pattern space +x +# look for the code items, if there wrap the pre-code tags +/\t| {4}/{ +s/(\t| {4})(.*)/
      \n\1\2\n<\/code><\/pre>/ # wrap the ending tags
      +p
      +b
      +}
      +p
      +' "$temp_file"
      +
      +sed -i '1 d' "$temp_file" # cleanup superfluous first line
      +
      +# convert html characters inside pre-code tags into printable representations
      +sed -ri '
      +# get inside pre-code tags
      +/^
      /{
      +:inside
      +n
      +# if you found the end tags, branch out
      +/^<\/code><\/pre>/!{
      +s/&/\&/g # ampersand
      +s//\>/g # greater than
      +b inside
      +}
      +}
      +' "$temp_file"
      +
      +# remove the first tab (or 4 spaces) from the code lines
      +sed -ri 's/^\t| {4}(.*)/\1/' "$temp_file"
      +
      +# br tags
      +sed -ri '
      +# if an empty line, append it to the next line, then check on whether there is two in a row
      +/^$/ {
      +N
      +N
      +/^\n{2}/s/(.*)/\n
      \1/ +} +' "$temp_file" + +# emphasis and strong emphasis and strikethrough +sed -nri ' +# batch up the entire stream of text until a line break in the action +/^$/b emphasis + +H +$ b emphasis +b + +:emphasis +x +s/\*\*(.+)\*\*/\1<\/strong>/g +s/__([^_]+)__/\1<\/strong>/g +s/\*([^\*]+)\*/\1<\/em>/g +s/([^\\])_([^_]+)_/\1\2<\/em>/g +s/\~\~(.+)\~\~/\1<\/strike>/g +p +' "$temp_file" + +sed -i '1 d' "$temp_file" # cleanup superfluous first line + +# paragraphs +sed -nri ' +# if an empty line, check the paragraph +/^$/ b para +# else append it to the hold buffer +H +# at end of file, check paragraph +$ b para +# now branch to end of script +b +# this is where a paragraph is checked for the pattern +:para +# return the entire paragraph into the pattern space +x +# look for non block-level elements, if there - print the p tags +/\n<(div|table|pre|p|[ou]l|h[1-6]|[bh]r|blockquote|li)/!{ +s/(\n+)(.*)/\1

      \n\2\n<\/p>/ +p +b +} +p +' "$temp_file" + +sed -i '1 d' "$temp_file" # cleanup superfluous first line + +# cleanup area where P tags have broken nesting +sed -nri ' +# if the line looks like like an end tag +/^<\/(div|table|pre|p|[ou]l|h[1-6]|[bh]r|blockquote)>/{ +h +# if EOF, print the line +$ { +x +b done +} +# fetch the next line and check on whether or not it is a P tag +n +/^<\/p>/{ +G +b done +} +# else, append the line to the previous line and print them both +H +x +} +:done +p +' "$temp_file" + +# inline styles and special characters +sed -ri ' +s/<(http[s]?:\/\/.*)>/\1<\/a>/g # automatic links +s/<(.*@.*\..*)>/\1<\/a>/g # automatic email address links + +# inline code +s/([^\\])``+ *([^ ]*) *``+/\1\2<\/code>/g +s/([^\\])`([^`]*)`/\1\2<\/code>/g + +s/!\[(.*)\]\((.*) \"(.*)\"\)/\"\1\"/g # inline image with title +s/!\[(.*)\]\((.*)\)/\"\1\"/g # inline image without title + +s/\[(.*)]\((.*) "(.*)"\)/\1<\/a>/g # inline link with title +s/\[(.*)]\((.*)\)/\1<\/a>/g # inline link + +# special characters +/&.+;/!s/&/\&/g # ampersand +/<[\/a-zA-Z]/!s/