commit d43b187b07817a06e827c2e4b95e5b8637b30174
parent adab1dee32d72c973747c77b54f6b440de4746e6
Author: St John Karp <contact@stjo.hn>
Date: Sun, 14 Jul 2019 16:04:01 -0500
Comment and document code
Wrote a readme file, and added comments throughout the code.
Diffstat:
D | entries.pl | | | 38 | -------------------------------------- |
A | generate_rss.pl | | | 69 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | helpers.pl | | | 13 | ++++++++++++- |
M | html.pl | | | 6 | ++++++ |
M | markdown.pl | | | 7 | ++++++- |
A | parse_entry.pl | | | 58 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | readme.md | | | 55 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | rss.pl | | | 55 | ++++++------------------------------------------------- |
M | tastic.sh | | | 28 | ++++++++++++++-------------- |
9 files changed, 226 insertions(+), 103 deletions(-)
diff --git a/entries.pl b/entries.pl
@@ -1,37 +0,0 @@
-:- include('helpers.pl').
-:- include('html.pl').
-:- include('markdown.pl').
-
-parse_entry:-
- read_file(user_input, HTML),
- parse_html(HTML).
-
-parse_entry(Filename):-
- open(Filename, read, Stream),
- read_file(Stream, HTML),
- close(Stream),
- parse_html(HTML).
-
-parse_html(HTML):-
- page(EntryCodes, Title, Subtitle, Date, HTML, []),
- markdown(EntryCodes, Title, Subtitle, Date, MarkdownCodes, []),
- atom_codes(Markdown, MarkdownCodes),
- write(Markdown),
- halt.
-
-generate_entry:-
- read_file(user_input, Entry),
- generate_html(Entry).
-
-generate_entry(Filename):-
- open(Filename, read, Stream),
- read_file(Stream, Entry),
- close(Stream),
- generate_html(Entry).
-
-generate_html(Markdown):-
- markdown(EntryCodes, Title, Subtitle, Date, Markdown, []),
- page(EntryCodes, Title, Subtitle, Date, HTMLCodes, []),
- atom_codes(HTML, HTMLCodes),
- write(HTML),
- halt.
-\ No newline at end of file
diff --git a/generate_rss.pl b/generate_rss.pl
@@ -0,0 +1,68 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: generate_rss.pl
+% Description: Predicates to generate an RSS file.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+:- include('helpers.pl').
+:- include('markdown.pl').
+:- include('rss.pl').
+
+% generate_rss(+BuildDate, +Filenames).
+% BuildDate is a list of character codes representing today's date (e.g. "2109-07-14").
+% Filenames is a list of atoms containing paths to all Markdown files with a date.
+% These files will be read, sorted by date, and used to generate an RSS of the most
+% recent posts.
+generate_rss(BuildDate, Filenames):-
+ % Read in all the files so we have their dates and contents.
+ files_to_articles(Filenames, Articles),
+ % Sort articles by date.
+ sort(Articles, SortedArticles),
+ % Grab the most recent 5.
+ take_last(5, SortedArticles, TakenArticles),
+ % Convert to RSS and write to stdout.
+ rss(BuildDate, TakenArticles, RSSCodes, []),
+ atom_codes(RSS, RSSCodes),
+ write(RSS),
+ halt.
+
+
+% files_to_articles(+Filenames, -Articles).
+% Read in each file as an article predicate.
+files_to_articles([], []).
+
+files_to_articles([Filename|Filenames], [article(Date, Title, Link, Description)|Articles]):-
+ open(Filename, read, Stream),
+ read_file(Stream, Markdown),
+ close(Stream),
+ % Grab the link.
+ get_link(Filename, Link),
+ % Extract the title, entry, etc. from the Markdown.
+ markdown(Entry, Title, _, Date, Markdown, []),
+ % XML escape the description.
+ replace("&", "&", Entry, EntryAmp),
+ replace("<", "<", EntryAmp, EntryLT),
+ replace(">", ">", EntryLT, Description),
+ files_to_articles(Filenames, Articles).
+
+
+% get_link(?Filename, ?Link).
+% Calculate a file's URL, given its current path.
+get_link(Filename, Link):-
+ atom_codes(Filename, FilenameCodes),
+ % Just assert that this is an index file before we go further.
+ % Backtracking after this point will take us down a rabbit hole.
+ append(_, "index.md", FilenameCodes),
+ site_url(URL, []),
+ append(_, "/source", StartPath),
+ append(StartPath, Path, FilenameCodes),
+ append(PathWithoutFile, "index.md", Path),
+ append(URL, PathWithoutFile, Link).
+
+get_link(Filename, Link):-
+ atom_codes(Filename, FilenameCodes),
+ site_url(URL, []),
+ append(_, "/source", StartPath),
+ append(StartPath, Path, FilenameCodes),
+ append(PathWithoutExtension, ".md", Path),
+ append(PathWithoutExtension, "/", PathWithSlash),
+ append(URL, PathWithSlash, Link).
+\ No newline at end of file
diff --git a/helpers.pl b/helpers.pl
@@ -1,5 +1,10 @@
-% Helpers
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: helpers.pl
+% Description: Misc. utility predicates.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% read_file(+Stream, -Codes).
+% Read a file to a list of character codes.
read_file(Stream, []):-
at_end_of_stream(Stream).
@@ -9,6 +14,8 @@ read_file(Stream, [Code|Rest]):-
read_file(Stream, Rest).
+% take_last(+Max, +List, -Results).
+% Return the last Max elements of List.
take_last(_, [], []).
take_last(Max, [First|Rest], Result):-
@@ -20,6 +27,10 @@ take_append(Max, _, ResultSoFar, ResultSoFar):-
take_append(_, Item, ResultSoFar, [Item|ResultSoFar]).
+
+% replace(+FindCodes, +ReplaceCodes, +Haystack, -Result).
+% Find instances of FindCodes in Haystack and replace with ReplaceCodes.
+% All four arguments are lists of character codes.
replace(_, _, [], []).
replace(FindCodes, ReplaceCodes, Haystack, Result):-
diff --git a/html.pl b/html.pl
@@ -1,3 +1,9 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: html.pl
+% Description: DCG definition of an HTML file.
+% This is basically your static website's template.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
page(Entry, Title, Subtitle, Date) -->
doctype,
whitespace,
diff --git a/markdown.pl b/markdown.pl
@@ -1,4 +1,9 @@
-% Markdown definition
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: markdown.pl
+% Description: DCG definition of a Markdown file.
+% Markdown files may have no metadata at the start,
+% or they may have a Title, Subtitle, and Date (all optional, but in that order).
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
markdown(Entry, Title, Subtitle, Date) -->
metadata("Title", Title),
diff --git a/parse_entry.pl b/parse_entry.pl
@@ -0,0 +1,57 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: parse_entry.pl
+% Description: Predicates to generate and parse a static site's Markdown/HTML.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+:- include('helpers.pl').
+:- include('html.pl').
+:- include('markdown.pl').
+
+% parse_entry.
+% Read in an HTML file from stdin.
+parse_entry:-
+ read_file(user_input, HTML),
+ parse_html(HTML).
+
+% parse_entry(+Filename).
+% Read in an HTML file from Filename.
+parse_entry(Filename):-
+ open(Filename, read, Stream),
+ read_file(Stream, HTML),
+ close(Stream),
+ parse_html(HTML).
+
+
+% parse_html(+HTML).
+% Parse HTML into a Markdown file and write to stdout.
+parse_html(HTML):-
+ page(EntryCodes, Title, Subtitle, Date, HTML, []),
+ markdown(EntryCodes, Title, Subtitle, Date, MarkdownCodes, []),
+ atom_codes(Markdown, MarkdownCodes),
+ write(Markdown),
+ halt.
+
+
+% generate_entry.
+% Read in a Markdown file from stdin.
+generate_entry:-
+ read_file(user_input, Entry),
+ generate_html(Entry).
+
+% generate_entry(Filename).
+% Read in a Markdown file from Filename.
+generate_entry(Filename):-
+ open(Filename, read, Stream),
+ read_file(Stream, Entry),
+ close(Stream),
+ generate_html(Entry).
+
+
+% generate_html(Markdown).
+% Parse Markdown into an HTML file and write to stdout.
+generate_html(Markdown):-
+ markdown(EntryCodes, Title, Subtitle, Date, Markdown, []),
+ page(EntryCodes, Title, Subtitle, Date, HTMLCodes, []),
+ atom_codes(HTML, HTMLCodes),
+ write(HTML),
+ halt.
+\ No newline at end of file
diff --git a/readme.md b/readme.md
@@ -0,0 +1,54 @@
+# Tastic
+
+A static site generator in Prolog (mostly).
+
+## What is this?
+
+A few months ago I lost the source files I used to generate my static website. Fortunately there was no irreparable data loss because I still had the generated site up on my server. The problem was now I needed to write a script that would extract all the articles into source files again, and then reconfigure the site generator. Then I went, "Oh. This is a Prolog problem." I figured if I could write a Prolog program that described my HTML template then I could use the same code both to un-generate and re-generate the website, because a Prolog program is basically a set of rules and the logic can be run in either direction. (But then I love Prolog so every problem is a Prolog problem but I don't care. Fight me.)
+
+So the skinny is I wound up writing my own static website generator in Prolog. Well, the main components are in Prolog. I also wrote a bash script to make use of a bunch of common \*nix utilities (find, sed, grep, etc.) and to pipe output to some third-party programs where I needed them (HTML Tidy and it's still TBD, but possibly Pandoc in the future). Weirdest bit was that I just couldn't find anything decent to generate RSS feeds. I considered dropping the RSS all together, but I've spent enough time haranguing people for not supporting interoperable standards that I didn't want to be a hypocrite. I wound up writing my own RSS generator too, also in Prolog.
+
+It's all reeeeeally alpha and is pretty closely tailored to my specific needs, but it works, and IMHO it works better than my old site generator which injected a bunch of nonsense into my HTML.
+
+## Dependencies
+
+* Bash. Used to run the script that automates everything else.
+* A Prolog interpreter. Tested with [SWI-Prolog](https://www.swi-prolog.org/), but the syntax should be vanilla ISO Prolog and should work with any implementation.
+* [HTML Tidy](http://www.html-tidy.org/). Used to format the HTML output nicely — not something I relished doing in Prolog.
+* [Smartypants](https://github.com/leohemsted/smartypants.py) located at ~/.local/bin/smartypants. Used to smarten the punctuation in the HTML output.
+
+## Assumptions
+
+The website folder used in the second argument is expected to contain three things:
+
+* a "source" folder containing the website's source;
+* an "output" folder containing the website's static output;
+* a "site.pl" file containing site-specific definitions.
+
+One or the other of the "source" and "output" folders must be populated, but not necessarily both.
+
+site.pl contains DCG definitions of this site's specifics, such as title, author, etc. An example site.pl file might look like this:
+
+ site_title --> "My website name".
+
+ site_subtitle --> "My website description/subtitle".
+
+ site_url --> "https://www.example.com".
+
+ email --> "webmaster@example.com".
+
+ name --> "Harold Gruntfuttock".
+
+## Use
+
+Generate a static website from Markdown sources:
+
+ ./tastic.sh generate /home/user/website
+
+Generate source files from a static website:
+
+ ./tastic.sh ungenerate /home/user/website
+
+## Still to do
+
+The source Markdown files are currently assumed to be plain HTML with a Markdown header containing metadata. I'm going to need something to convert proper Markdown to HTML, so I'll probably add Pandoc as a dependency to tastic.sh. I expect this will also replace Smartypants for doing smart punctuation.
+\ No newline at end of file
diff --git a/rss.pl b/rss.pl
@@ -1,50 +1,7 @@
-:- include('helpers.pl').
-:- include('markdown.pl').
-
-generate_rss(BuildDate, Filenames):-
- files_to_articles(Filenames, Articles),
- sort(Articles, SortedArticles),
- take_last(5, SortedArticles, TakenArticles),
- rss(BuildDate, TakenArticles, RSSCodes, []),
- atom_codes(RSS, RSSCodes),
- write(RSS),
- halt.
-
-files_to_articles([], []).
-
-files_to_articles([Filename|Filenames], [article(Date, Title, Link, Description)|Articles]):-
- open(Filename, read, Stream),
- read_file(Stream, Markdown),
- close(Stream),
- % Grab the link.
- get_link(Filename, Link),
- % Extract the title, entry, etc. from the Markdown.
- markdown(Entry, Title, _, Date, Markdown, []),
- % XML escape the description.
- replace("&", "&", Entry, EntryAmp),
- replace("<", "<", EntryAmp, EntryLT),
- replace(">", ">", EntryLT, Description),
- files_to_articles(Filenames, Articles).
-
-get_link(Filename, Link):-
- atom_codes(Filename, FilenameCodes),
- % Just assert that this is an index file before we go further.
- % Backtracking after this point will take us down a rabbit hole.
- append(_, "index.md", FilenameCodes),
- site_url(URL, []),
- append(_, "/source", StartPath),
- append(StartPath, Path, FilenameCodes),
- append(PathWithoutFile, "index.md", Path),
- append(URL, PathWithoutFile, Link).
-
-get_link(Filename, Link):-
- atom_codes(Filename, FilenameCodes),
- site_url(URL, []),
- append(_, "/source", StartPath),
- append(StartPath, Path, FilenameCodes),
- append(PathWithoutExtension, ".md", Path),
- append(PathWithoutExtension, "/", PathWithSlash),
- append(URL, PathWithSlash, Link).
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% File: rss.pl
+% Description: DCG definition of an RSS file.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rss(BuildDate, Articles) -->
rss_open,
@@ -115,7 +72,7 @@ webmaster -->
last_build_date(BuildDate) -->
"<lastBuildDate>",
- BuildDate,
+ anything(BuildDate),
"</lastBuildDate>".
items([]) --> [].
@@ -146,7 +103,7 @@ author -->
pubdate(Date) -->
"<pubDate>",
- Date,
+ anything(Date),
"</pubDate>".
item_close --> "</item>".
diff --git a/tastic.sh b/tastic.sh
@@ -8,17 +8,17 @@ SITE_PATH=$2
if [ "$1" == "ungenerate" ]
then
# Create the directory structure.
- rm -rf $SITE_PATH/$SOURCE_DIR/*
- find $SITE_PATH/$OUTPUT_DIR -type d |
+ rm -rf "$SITE_PATH"/"$SOURCE_DIR"/*
+ find "$SITE_PATH"/"$OUTPUT_DIR" -type d |
sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|" |
xargs -0 -d '\n' mkdir -p --
# Parse and create all the markdown files.
- find $SITE_PATH/$OUTPUT_DIR -type f -name "*.html" -print0 |
+ find "$SITE_PATH"/"$OUTPUT_DIR" -type f -name "*.html" -print0 |
while IFS= read -r -d '' file; do
NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|" | sed 's|.html$|.md|'`
cat "$file" |
- swipl --traditional -q -l entries.pl -g "consult('$SITE_PATH/site.pl'), parse_entry." |
+ swipl --traditional -q -l parse_entry.pl -g "consult('$SITE_PATH/site.pl'), parse_entry." |
# Unsmarten the punctuation.
sed "s| | |g" |
sed "s|‘|'|g" |
@@ -29,7 +29,7 @@ then
done
# Copy anything else directly.
- find $SITE_PATH/$OUTPUT_DIR -type f -not -name "*.html" -print0 |
+ find "$SITE_PATH"/"$OUTPUT_DIR" -type f -not -name "*.html" -print0 |
while IFS= read -r -d '' file; do
NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$OUTPUT_DIR|$SITE_PATH/$SOURCE_DIR|"`
cp "$file" "$NEW_PATH"
@@ -37,37 +37,37 @@ then
elif [ "$1" == "generate" ]
then
# Create the directory structure.
- rm -rf $SITE_PATH/$OUTPUT_DIR/*
- find $SITE_PATH/$SOURCE_DIR -type d |
+ rm -rf "$SITE_PATH"/"$OUTPUT_DIR"/*
+ find "$SITE_PATH"/"$SOURCE_DIR" -type d |
sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|" |
xargs -0 -d '\n' mkdir -p --
# Parse and create all the HTML files.
- find $SITE_PATH/$SOURCE_DIR -type f -name "*.md" -print0 |
+ find "$SITE_PATH"/"$SOURCE_DIR" -type f -name "*.md" -print0 |
while IFS= read -r -d '' file; do
echo $file
NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|" | sed 's|.md$|.html|'`
cat "$file" |
- swipl --traditional -q -l entries.pl -g "consult('$SITE_PATH/site.pl'), generate_entry." |
+ swipl --traditional -q -l parse_entry.pl -g "consult('$SITE_PATH/site.pl'), generate_entry." |
tidy -quiet --indent auto --indent-with-tabs yes --wrap 0 -asxml --tidy-mark no |
~/.local/bin/smartypants \
> "$NEW_PATH"
done
# Copy anything else directly.
- find $SITE_PATH/$SOURCE_DIR -type f -not -name "*.md" -print0 |
+ find "$SITE_PATH"/"$SOURCE_DIR" -type f -not -name "*.md" -print0 |
while IFS= read -r -d '' file; do
NEW_PATH=`echo "$file" | sed "s|^$SITE_PATH/$SOURCE_DIR|$SITE_PATH/$OUTPUT_DIR|"`
cp "$file" "$NEW_PATH"
done
# Generate the RSS feed.
- mkdir -p $SITE_PATH/$OUTPUT_DIR/feeds
- ARTICLES=`grep -Rl --include=\*.md "^Date: " $SITE_PATH/$SOURCE_DIR | paste -sd ',' - | sed "s|,|','|g"`
+ mkdir -p "$SITE_PATH"/"$OUTPUT_DIR"/feeds
+ ARTICLES=`grep -Rl --include=\*.md "^Date: " "$SITE_PATH"/"$SOURCE_DIR" | paste -sd ',' - | sed "s|,|','|g"`
BUILD_DATE=`date +"%Y-%m-%d %T"`
- swipl --traditional -q -l rss.pl -g "consult('$SITE_PATH/site.pl'), generate_rss(\"$BUILD_DATE\", ['$ARTICLES'])." |
+ swipl --traditional -q -l generate_rss.pl -g "consult('$SITE_PATH/site.pl'), generate_rss(\"$BUILD_DATE\", ['$ARTICLES'])." |
tidy -quiet --indent auto --indent-with-tabs yes --wrap 0 -xml --tidy-mark no \
- > $SITE_PATH/$OUTPUT_DIR/feeds/rss.xml
+ > "$SITE_PATH"/"$OUTPUT_DIR"/feeds/rss.xml
else
echo "Invalid argument."
exit 1