Comment and document code - squeeze - A static site generator that can put the toothpaste back in the tube.

commit d43b187b07817a06e827c2e4b95e5b8637b30174
parent adab1dee32d72c973747c77b54f6b440de4746e6
Author: St John Karp <contact@stjo.hn>
Date:   Sun, 14 Jul 2019 16:04:01 -0500

Comment and document code

Wrote a readme file, and added comments throughout the code.

Diffstat:
D entries.pl  | 38 --------------------------------------
A generate_rss.pl  | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M helpers.pl  | 13 ++++++++++++-
M html.pl  | 6 ++++++
M markdown.pl  | 7 ++++++-
A parse_entry.pl  | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A readme.md  | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
M rss.pl  | 55 ++++++-------------------------------------------------
M tastic.sh  | 28 ++++++++++++++--------------

9 files changed, 226 insertions(+), 103 deletions(-)
diff --git a/entries.pl b/entries.pl
@@ -1,37 +0,0 @@
-:- include('helpers.pl').
-:- include('html.pl').
-:- include('markdown.pl').
-
-parse_entry:-
-	read_file(user_input, HTML),
-	parse_html(HTML).
-
-parse_entry(Filename):-
-	open(Filename, read, Stream),
-	read_file(Stream, HTML),
-	close(Stream),
-	parse_html(HTML).
-
-parse_html(HTML):-
-	page(EntryCodes, Title, Subtitle, Date, HTML, []),
-	markdown(EntryCodes, Title, Subtitle, Date, MarkdownCodes, []),
-	atom_codes(Markdown, MarkdownCodes),
-	write(Markdown),
-	halt.
-
-generate_entry:-
-	read_file(user_input, Entry),
-	generate_html(Entry).
-
-generate_entry(Filename):-
-	open(Filename, read, Stream),
-	read_file(Stream, Entry),
-	close(Stream),
-	generate_html(Entry).
-
-generate_html(Markdown):-
-	markdown(EntryCodes, Title, Subtitle, Date, Markdown, []),
-	page(EntryCodes, Title, Subtitle, Date, HTMLCodes, []),
-	atom_codes(HTML, HTMLCodes),
-	write(HTML),
-	halt.
-\ No newline at end of file
diff --git a/generate_rss.pl b/generate_rss.pl
@@ -0,0 +1,68 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: generate_rss.pl
+% Description: Predicates to generate an RSS file.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+:- include('helpers.pl').
+:- include('markdown.pl').
+:- include('rss.pl').
+
+% generate_rss(+BuildDate, +Filenames).
+%	BuildDate is a list of character codes representing today's date (e.g. "2109-07-14").
+%	Filenames is a list of atoms containing paths to all Markdown files with a date.
+%	These files will be read, sorted by date, and used to generate an RSS of the most
+%	recent posts.
+generate_rss(BuildDate, Filenames):-
+	% Read in all the files so we have their dates and contents.
+	files_to_articles(Filenames, Articles),
+	% Sort articles by date.
+	sort(Articles, SortedArticles),
+	% Grab the most recent 5.
+	take_last(5, SortedArticles, TakenArticles),
+	% Convert to RSS and write to stdout.
+	rss(BuildDate, TakenArticles, RSSCodes, []),
+	atom_codes(RSS, RSSCodes),
+	write(RSS),
+	halt.
+
+
+% files_to_articles(+Filenames, -Articles).
+%	Read in each file as an article predicate.
+files_to_articles([], []).
+
+files_to_articles([Filename|Filenames], [article(Date, Title, Link, Description)|Articles]):-
+	open(Filename, read, Stream),
+	read_file(Stream, Markdown),
+	close(Stream),
+	% Grab the link.
+	get_link(Filename, Link),
+	% Extract the title, entry, etc. from the Markdown.
+	markdown(Entry, Title, _, Date, Markdown, []),
+	% XML escape the description.
+	replace("&", "&amp;", Entry, EntryAmp),
+	replace("<", "&lt;", EntryAmp, EntryLT),
+	replace(">", "&gt;", EntryLT, Description),
+	files_to_articles(Filenames, Articles).
+
+
+% get_link(?Filename, ?Link).
+%	Calculate a file's URL, given its current path.
+get_link(Filename, Link):-
+	atom_codes(Filename, FilenameCodes),
+	% Just assert that this is an index file before we go further.
+	% Backtracking after this point will take us down a rabbit hole.
+	append(_, "index.md", FilenameCodes),
+	site_url(URL, []),
+	append(_, "/source", StartPath),
+	append(StartPath, Path, FilenameCodes),
+	append(PathWithoutFile, "index.md", Path),
+	append(URL, PathWithoutFile, Link).
+
+get_link(Filename, Link):-
+	atom_codes(Filename, FilenameCodes),
+	site_url(URL, []),
+	append(_, "/source", StartPath),
+	append(StartPath, Path, FilenameCodes),
+	append(PathWithoutExtension, ".md", Path),
+	append(PathWithoutExtension, "/", PathWithSlash),
+	append(URL, PathWithSlash, Link).
+\ No newline at end of file
diff --git a/helpers.pl b/helpers.pl
@@ -1,5 +1,10 @@
-% Helpers
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: helpers.pl
+% Description: Misc. utility predicates.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+% read_file(+Stream, -Codes).
+%	Read a file to a list of character codes.
 read_file(Stream, []):-
 	at_end_of_stream(Stream).
 
@@ -9,6 +14,8 @@ read_file(Stream, [Code|Rest]):-
 	read_file(Stream, Rest).
 
 
+% take_last(+Max, +List, -Results).
+%	Return the last Max elements of List.
 take_last(_, [], []).
 
 take_last(Max, [First|Rest], Result):-
@@ -20,6 +27,10 @@ take_append(Max, _, ResultSoFar, ResultSoFar):-
 
 take_append(_, Item, ResultSoFar, [Item|ResultSoFar]).
 
+
+% replace(+FindCodes, +ReplaceCodes, +Haystack, -Result).
+%	Find instances of FindCodes in Haystack and replace with ReplaceCodes.
+%	All four arguments are lists of character codes.
 replace(_, _, [], []).
 
 replace(FindCodes, ReplaceCodes, Haystack, Result):-
diff --git a/html.pl b/html.pl
@@ -1,3 +1,9 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: html.pl
+% Description: DCG definition of an HTML file.
+%	This is basically your static website's template.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 page(Entry, Title, Subtitle, Date) -->
 	doctype,
 	whitespace,
diff --git a/markdown.pl b/markdown.pl
@@ -1,4 +1,9 @@
-% Markdown definition
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: markdown.pl
+% Description: DCG definition of a Markdown file.
+%	Markdown files may have no metadata at the start,
+%	or they may have a Title, Subtitle, and Date (all optional, but in that order).
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 markdown(Entry, Title, Subtitle, Date) -->
 	metadata("Title", Title),
diff --git a/parse_entry.pl b/parse_entry.pl
@@ -0,0 +1,57 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: parse_entry.pl
+% Description: Predicates to generate and parse a static site's Markdown/HTML.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+:- include('helpers.pl').
+:- include('html.pl').
+:- include('markdown.pl').
+
+% parse_entry.
+%	Read in an HTML file from stdin.
+parse_entry:-
+	read_file(user_input, HTML),
+	parse_html(HTML).
+
+% parse_entry(+Filename).
+%	Read in an HTML file from Filename.
+parse_entry(Filename):-
+	open(Filename, read, Stream),
+	read_file(Stream, HTML),
+	close(Stream),
+	parse_html(HTML).
+
+
+% parse_html(+HTML).
+%	Parse HTML into a Markdown file and write to stdout.
+parse_html(HTML):-
+	page(EntryCodes, Title, Subtitle, Date, HTML, []),
+	markdown(EntryCodes, Title, Subtitle, Date, MarkdownCodes, []),
+	atom_codes(Markdown, MarkdownCodes),
+	write(Markdown),
+	halt.
+
+
+% generate_entry.
+%	Read in a Markdown file from stdin.
+generate_entry:-
+	read_file(user_input, Entry),
+	generate_html(Entry).
+
+% generate_entry(Filename).
+%	Read in a Markdown file from Filename.
+generate_entry(Filename):-
+	open(Filename, read, Stream),
+	read_file(Stream, Entry),
+	close(Stream),
+	generate_html(Entry).
+
+
+% generate_html(Markdown).
+%	Parse Markdown into an HTML file and write to stdout.
+generate_html(Markdown):-
+	markdown(EntryCodes, Title, Subtitle, Date, Markdown, []),
+	page(EntryCodes, Title, Subtitle, Date, HTMLCodes, []),
+	atom_codes(HTML, HTMLCodes),
+	write(HTML),
+	halt.
+\ No newline at end of file
diff --git a/readme.md b/readme.md
@@ -0,0 +1,54 @@
+# Tastic
+
+A static site generator in Prolog (mostly).
+
+## What is this?
+
+A few months ago I lost the source files I used to generate my static website. Fortunately there was no irreparable data loss because I still had the generated site up on my server. The problem was now I needed to write a script that would extract all the articles into source files again, and then reconfigure the site generator. Then I went, "Oh. This is a Prolog problem." I figured if I could write a Prolog program that described my HTML template then I could use the same code both to un-generate and re-generate the website, because a Prolog program is basically a set of rules and the logic can be run in either direction. (But then I love Prolog so every problem is a Prolog problem but I don't care. Fight me.)
+
+So the skinny is I wound up writing my own static website generator in Prolog. Well, the main components are in Prolog. I also wrote a bash script to make use of a bunch of common \*nix utilities (find, sed, grep, etc.) and to pipe output to some third-party programs where I needed them (HTML Tidy and it's still TBD, but possibly Pandoc in the future). Weirdest bit was that I just couldn't find anything decent to generate RSS feeds. I considered dropping the RSS all together, but I've spent enough time haranguing people for not supporting interoperable standards that I didn't want to be a hypocrite. I wound up writing my own RSS generator too, also in Prolog.
+
+It's all reeeeeally alpha and is pretty closely tailored to my specific needs, but it works, and IMHO it works better than my old site generator which injected a bunch of nonsense into my HTML.
+
+## Dependencies
+
+* Bash. Used to run the script that automates everything else.
+* A Prolog interpreter. Tested with [SWI-Prolog](https://www.swi-prolog.org/), but the syntax should be vanilla ISO Prolog and should work with any implementation.
+* [HTML Tidy](http://www.html-tidy.org/). Used to format the HTML output nicely — not something I relished doing in Prolog.
+* [Smartypants](https://github.com/leohemsted/smartypants.py) located at ~/.local/bin/smartypants. Used to smarten the punctuation in the HTML output.
+
+## Assumptions
+
+The website folder used in the second argument is expected to contain three things:
+
+* a "source" folder containing the website's source;
+* an "output" folder containing the website's static output;
+* a "site.pl" file containing site-specific definitions.
+
+One or the other of the "source" and "output" folders must be populated, but not necessarily both.
+
+site.pl contains DCG definitions of this site's specifics, such as title, author, etc. An example site.pl file might look like this:
+
+	site_title --> "My website name".
+
+	site_subtitle --> "My website description/subtitle".
+
+	site_url --> "https://www.example.com".
+
+	email --> "webmaster@example.com".
+
+	name --> "Harold Gruntfuttock".
+
+## Use
+
+Generate a static website from Markdown sources:
+
+	./tastic.sh generate /home/user/website
+
+Generate source files from a static website:
+
+	./tastic.sh ungenerate /home/user/website
+
+## Still to do
+
+The source Markdown files are currently assumed to be plain HTML with a Markdown header containing metadata. I'm going to need something to convert proper Markdown to HTML, so I'll probably add Pandoc as a dependency to tastic.sh. I expect this will also replace Smartypants for doing smart punctuation.
+\ No newline at end of file
diff --git a/rss.pl b/rss.pl
@@ -1,50 +1,7 @@
-:- include('helpers.pl').
-:- include('markdown.pl').
-
-generate_rss(BuildDate, Filenames):-
-	files_to_articles(Filenames, Articles),
-	sort(Articles, SortedArticles),
-	take_last(5, SortedArticles, TakenArticles),
-	rss(BuildDate, TakenArticles, RSSCodes, []),
-	atom_codes(RSS, RSSCodes),
-	write(RSS),
-	halt.
-
-files_to_articles([], []).
-
-files_to_articles([Filename|Filenames], [article(Date, Title, Link, Description)|Articles]):-
-	open(Filename, read, Stream),
-	read_file(Stream, Markdown),
-	close(Stream),
-	% Grab the link.
-	get_link(Filename, Link),
-	% Extract the title, entry, etc. from the Markdown.
-	markdown(Entry, Title, _, Date, Markdown, []),
-	% XML escape the description.
-	replace("&", "&amp;", Entry, EntryAmp),
-	replace("<", "&lt;", EntryAmp, EntryLT),
-	replace(">", "&gt;", EntryLT, Description),
-	files_to_articles(Filenames, Articles).
-
-get_link(Filename, Link):-
-	atom_codes(Filename, FilenameCodes),
-	% Just assert that this is an index file before we go further.
-	% Backtracking after this point will take us down a rabbit hole.
-	append(_, "index.md", FilenameCodes),
-	site_url(URL, []),
-	append(_, "/source", StartPath),
-	append(StartPath, Path, FilenameCodes),
-	append(PathWithoutFile, "index.md", Path),
-	append(URL, PathWithoutFile, Link).
-
-get_link(Filename, Link):-
-	atom_codes(Filename, FilenameCodes),
-	site_url(URL, []),
-	append(_, "/source", StartPath),
-	append(StartPath, Path, FilenameCodes),
-	append(PathWithoutExtension, ".md", Path),
-	append(PathWithoutExtension, "/", PathWithSlash),
-	append(URL, PathWithSlash, Link).
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: rss.pl
+% Description: DCG definition of an RSS file.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 rss(BuildDate, Articles) -->
 	rss_open,
@@ -115,7 +72,7 @@ webmaster -->
 
 last_build_date(BuildDate) -->
 	"<lastBuildDate>",
-	BuildDate,
+	anything(BuildDate),
 	"</lastBuildDate>".
 
 items([]) --> [].
@@ -146,7 +103,7 @@ author -->
 
 pubdate(Date) -->
 	"<pubDate>",
-	Date,
+	anything(Date),
 	"</pubDate>".
 
 item_close --> "</item>".
diff --git a/tastic.sh b/tastic.sh
@@ -8,17 +8,17 @@ SITE_PATH=$2
 if [ "$1" == "ungenerate" ]
 then
 	# Create the directory structure.
-	rm -rf $SITE_PATH/$SOURCE_DIR/*
-	find $SITE_PATH/$OUTPUT_DIR -type d |
+	rm -rf "$SITE_PATH"/"$SOURCE_DIR"/*
+	find "$SITE_PATH"/"$OUTPUT_DIR" -type d |
 		sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|" |
 		xargs -0 -d '\n' mkdir -p --
 
 	# Parse and create all the markdown files.
-	find $SITE_PATH/$OUTPUT_DIR -type f -name "*.html" -print0 |
+	find "$SITE_PATH"/"$OUTPUT_DIR" -type f -name "*.html" -print0 |
 		while IFS= read -r -d '' file; do
 			NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|" | sed 's|.html$|.md|'`
 			cat "$file" |
-				swipl --traditional -q -l entries.pl -g "consult('$SITE_PATH/site.pl'), parse_entry." |
+				swipl --traditional -q -l parse_entry.pl -g "consult('$SITE_PATH/site.pl'), parse_entry." |
 				# Unsmarten the punctuation.
 				sed "s|&nbsp;| |g" |
 				sed "s|&#8216;|'|g" |
@@ -29,7 +29,7 @@ then
 		done
 
 	# Copy anything else directly.
-	find $SITE_PATH/$OUTPUT_DIR -type f -not -name "*.html" -print0 |
+	find "$SITE_PATH"/"$OUTPUT_DIR" -type f -not -name "*.html" -print0 |
 		while IFS= read -r -d '' file; do
 			NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|"`
 			cp "$file" "$NEW_PATH"
@@ -37,37 +37,37 @@ then
 elif [ "$1" == "generate" ]
 then
 	# Create the directory structure.
-	rm -rf $SITE_PATH/$OUTPUT_DIR/*
-	find $SITE_PATH/$SOURCE_DIR -type d |
+	rm -rf "$SITE_PATH"/"$OUTPUT_DIR"/*
+	find "$SITE_PATH"/"$SOURCE_DIR" -type d |
 		sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|" |
 		xargs -0 -d '\n' mkdir -p --
 
 	# Parse and create all the HTML files.
-	find $SITE_PATH/$SOURCE_DIR -type f -name "*.md" -print0 |
+	find "$SITE_PATH"/"$SOURCE_DIR" -type f -name "*.md" -print0 |
 		while IFS= read -r -d '' file; do
 			echo $file
 			NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|" | sed 's|.md$|.html|'`
 			cat "$file" |
-				swipl --traditional -q -l entries.pl -g "consult('$SITE_PATH/site.pl'), generate_entry." |
+				swipl --traditional -q -l parse_entry.pl -g "consult('$SITE_PATH/site.pl'), generate_entry." |
 				tidy -quiet --indent auto --indent-with-tabs yes --wrap 0 -asxml --tidy-mark no |
 				~/.local/bin/smartypants \
 				> "$NEW_PATH"
 		done
 
 	# Copy anything else directly.
-	find $SITE_PATH/$SOURCE_DIR -type f -not -name "*.md" -print0 |
+	find "$SITE_PATH"/"$SOURCE_DIR" -type f -not -name "*.md" -print0 |
 		while IFS= read -r -d '' file; do
 			NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|"`
 			cp "$file" "$NEW_PATH"
 		done
 
 	# Generate the RSS feed.
-	mkdir -p $SITE_PATH/$OUTPUT_DIR/feeds
-	ARTICLES=`grep -Rl --include=\*.md "^Date: " $SITE_PATH/$SOURCE_DIR | paste -sd ',' - | sed "s|,|','|g"`
+	mkdir -p "$SITE_PATH"/"$OUTPUT_DIR"/feeds
+	ARTICLES=`grep -Rl --include=\*.md "^Date: " "$SITE_PATH"/"$SOURCE_DIR" | paste -sd ',' - | sed "s|,|','|g"`
 	BUILD_DATE=`date +"%Y-%m-%d %T"`
-	swipl --traditional -q -l rss.pl -g "consult('$SITE_PATH/site.pl'), generate_rss(\"$BUILD_DATE\", ['$ARTICLES'])." |
+	swipl --traditional -q -l generate_rss.pl -g "consult('$SITE_PATH/site.pl'), generate_rss(\"$BUILD_DATE\", ['$ARTICLES'])." |
 		tidy -quiet --indent auto --indent-with-tabs yes --wrap 0 -xml --tidy-mark no \
-		> $SITE_PATH/$OUTPUT_DIR/feeds/rss.xml
+		> "$SITE_PATH"/"$OUTPUT_DIR"/feeds/rss.xml
 else
 	echo "Invalid argument."
 	exit 1

	squeeze A static site generator that can put the toothpaste back in the tube.
	git clone https://git.stjo.hn/squeeze
	Log \| Files \| Refs \| README \| LICENSE

D	entries.pl	\|	38	--------------------------------------
A	generate_rss.pl	\|	69	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	helpers.pl	\|	13	++++++++++++-
M	html.pl	\|	6	++++++
M	markdown.pl	\|	7	++++++-
A	parse_entry.pl	\|	58	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	readme.md	\|	55	+++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	rss.pl	\|	55	++++++-------------------------------------------------
M	tastic.sh	\|	28	++++++++++++++--------------