build_rss.sh 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. get_publish_date_for_rss_feed() {
  2. sed -n -e 's/<meta data-date=\"//p' "$1" |\
  3. sed -n -e 's/&lt;//p' |\
  4. sed -n -e 's/&gt;//p' |\
  5. sed -n -e 's/\" \/>//p' |\
  6. sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
  7. }
  8. get_html_element_title() {
  9. sed -n -e 's/<title>//p' "$1" |\
  10. sed -n -e 's/<\/title>//p' |\
  11. sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
  12. }
  13. get_html_element_meta_description() {
  14. sed -n -e 's/<meta name=\"description\" content=\"//p' "$1" |\
  15. sed -n -e 's/\" \/>//p' |\
  16. sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
  17. }
  18. get_link_for_rss_feed() {
  19. local current=${1:?}
  20. local input_directory=${2:?}
  21. local absolute_path=${3:?}
  22. echo "${current/$input_directory/$absolute_path}"
  23. }
  24. build_rss_main() {
  25. local input_directory=${1:?}
  26. local absolute_path=${2:?}
  27. local output_file=${3:?}
  28. local site_name=${4:?}
  29. local max_entries=${5:-10}
  30. if [[ ! $max_entries =~ ^-?[0-9]+$ ]] || [ "$max_entries" -lt 1 ]; then
  31. echo "Must have at least one entry, defaulting to 1."
  32. max_entries=1
  33. fi
  34. # if [ ! -d feeds ]; then
  35. # mkdir feeds
  36. # fi
  37. if [ -f "$output_file" ]; then
  38. rm "${output_file}"
  39. else
  40. echo "Create new rss feed."
  41. fi
  42. mkdir -p "$(dirname "${output_file}")"
  43. {
  44. echo "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
  45. echo "<rss version=\"2.0\">"
  46. echo " <channel>"
  47. echo " <description/>"
  48. echo " <lastBuildDate>$(date)</lastBuildDate>"
  49. echo " <link>/</link>"
  50. echo " <title>${site_name}</title>"
  51. } > "${output_file}"
  52. local path_array=()
  53. while IFS= read -r -d $'\0'; do
  54. path_array+=("$REPLY")
  55. done < <(find "${input_directory}" -type f -name "*.html" -print0)
  56. unset IFS
  57. local wfmd_array=()
  58. for i in "${path_array[@]}"
  59. do
  60. item="$(get_publish_date_for_rss_feed "$i")|\
  61. $(get_html_element_title "$i")|\
  62. $(get_html_element_meta_description "$i")|\
  63. $(get_link_for_rss_feed "$i" "${input_directory}" "${absolute_path}")"
  64. wfmd_array+=("${item}")
  65. done
  66. local sort_array=()
  67. # TODO: Should probably go full bash or posix. So, could do something like this:
  68. # IFS=$'\n' read -d '' -r -a sort_array < <(printf '%s\n' "${wfmd_array[@]}" | sort --reverse)
  69. readarray -t sort_array < <(printf '%s\n' "${wfmd_array[@]}" | sort --reverse)
  70. for j in "${sort_array[@]}"
  71. do
  72. if [ "$max_entries" -lt 1 ]; then
  73. break
  74. fi
  75. IFS='|' read -ra faux <<< "$j"; unset IFS
  76. if [ ! -z "${faux[0]}" ]; then
  77. max_entries=$((max_entries-1))
  78. {
  79. echo " <item>"
  80. echo " <pubDate>${faux[0]}</pubDate>"
  81. echo " <description>${faux[2]}</description>"
  82. echo " <link>${faux[3]}</link>"
  83. echo " <title>${faux[1]}</title>"
  84. echo " </item>"
  85. } >> "${output_file}"
  86. fi
  87. done
  88. {
  89. echo " </channel>"
  90. echo "</rss>"
  91. } >> "${output_file}"
  92. }