squeeze.sh (4214B)
1 #!/usr/bin/env sh 2 3 # Generate a static website. 4 5 # Usage: squeeze.sh [-f|--force] site_path 6 7 force=0 8 9 # Loop through all the arguments and set flags/options. 10 while [ "$#" -gt 0 ] ; do 11 case "$1" in 12 -f|--force) 13 force=1 14 shift 15 ;; 16 *) 17 site_path="$1" 18 shift 19 ;; 20 esac 21 done 22 23 output_path="$site_path/output" 24 source_path="$site_path/source" 25 feed_path="$output_path/feeds/rss.xml" 26 27 # A space-separated list of all the process IDs we've started. 28 proc_ids="" 29 # Max number of processes to run at once. 30 # There is no way to do `nproc` with only POSIX tools, 31 # so the best way to make this portable is with fallbacks. 32 # `nproc` itself isn't even universal on Linux, so the safest 33 # place to get the number of processors on Linux is /proc/cpuinfo. 34 max_processes="$( 35 grep -c ^processor /proc/cpuinfo || 36 sysctl -n hw.ncpu 2>/dev/null || 37 getconf _NPROCESSORS_ONLN 2>/dev/null 38 )" 39 40 # Regenerate everything if the force flag has been used or there is 41 # no RSS file, but otherwise only regenerate Markdown files that have 42 # changed since the RSS feed was updated. 43 rsync_exclude= 44 find_test= 45 [ "$force" -eq 0 ] && 46 [ -f "$feed_path" ] && 47 # Don't delete already generated HTML files. 48 rsync_exclude="--exclude *.html" && 49 # Only find Markdown files newer than the RSS feed. 50 find_test="-newer $feed_path" && 51 # Find and delete any HTML files for which a source Markdown 52 # no longer exists. 53 find "$output_path" -type f -name "*.html" | 54 sed "s|$output_path/||" | 55 while IFS= read -r file ; do 56 [ ! -f "$source_path/${file%.html}.md" ] && 57 echo "deleting orphaned $file" && 58 rm "$output_path/$file" 59 done 60 61 # Copy everything that's not Markdown. 62 # This will also create the folder structure for the destination Markdown files. 63 rsync --archive --delete --verbose \ 64 --exclude "*.md" --exclude "feeds" $rsync_exclude \ 65 "$source_path/" "$output_path/" 66 67 # Parse and create all the HTML files. 68 markdown_files="$(find "$source_path" -type f -name "*.md" $find_test)" 69 line_count="$(echo "$markdown_files" | wc -l | tr -d -c '[:digit:]')" 70 index=0 71 72 echo "$markdown_files" | 73 sed "s|$source_path/||" | 74 while IFS= read -r file ; do 75 echo "$file" 76 index="$(expr "$index" + 1)" 77 78 # Determine if this file has any metadata at the start. 79 # Metadata are in the format Key: value, so it's easy to detect. 80 head -n 1 "$source_path/$file" | grep -q "^[A-Za-z]*: " && 81 headers=1 || 82 headers=0 83 84 # Get everything after the metadata. 85 ([ "$headers" -eq 1 ] && sed '1,/^$/d' || cat) < "$source_path/$file" | 86 # Convert Markdown to HTML. 87 markdown_py --extension footnotes --extension md_in_html --extension smarty --quiet --output_format xhtml | 88 # Recombine with the metadata and hand it to Prolog. 89 ([ "$headers" -eq 1 ] && sed '/^$/q' "$source_path/$file" ; cat) | 90 swipl --traditional --quiet -l parse_entry.pl -g "consult('$site_path/site.pl'), generate_entry." | 91 # Unwrap block-level elements that have erroneously been wrapped in <p> tags. 92 sed 's|<p><details|<details|g' | 93 sed 's|</summary></p>|</summary>|g' | 94 sed 's|<p></details></p>|</details>|g' | 95 sed 's|<p><figure|<figure|g' | 96 sed 's|</figure></p>|</figure>|g' | 97 # Smarten punctuation. 98 smartypants \ 99 > "$output_path/${file%.md}.html" & 100 101 if [ "$index" -eq "$line_count" ] ; then 102 # Wait until all jobs have completed. 103 wait 104 else 105 # Add the most recent process ID to the list. 106 proc_ids="$! $proc_ids" 107 # Pause while the number of created processes is greater than 108 # or equal to the max processes. 109 while [ "$(ps -p "${proc_ids%% }" | tail -n +2 | wc -l | tr -d -c '[:digit:]')" -ge "$max_processes" ] ; do 110 true 111 done 112 fi 113 done 114 115 # Generate the RSS feed. 116 mkdir -p "${feed_path%/*}" 117 # Grep the date of each article. 118 find "$output_path" -type f -name "*.html" \ 119 -exec grep "id=\"article-date\"" {} + | 120 # Sort articles by date (skipping the first field). 121 sort -k 2 | 122 # Get the last (i.e. most recent) posts for the RSS feed. 123 tail -5 | 124 # Reformat to just the file names. 125 cut -f 1 -d : | 126 # Parse the articles and generate the RSS. 127 swipl --traditional --quiet -l generate_rss.pl -g "consult('$site_path/site.pl'), generate_rss(\"$(date '+%a, %d %b %Y %T %Z')\")." \ 128 > "$feed_path"