podcast-json-to-rdf.sh 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/bin/sh
  2. cd "$(dirname "$0")/../htdocs/stations"
  3. #
  4. # time sudo -u radio-pi sh bin/podcast-json-to-rdf.sh
  5. #
  6. xmllint --version 2>/dev/null || { echo "Please install xmllint" ; exit 1 ; }
  7. curl --version >/dev/null || { echo "Please install curl" ; exit 1 ; }
  8. cwd="$(pwd)"
  9. dst="podcasts.rdf"
  10. OSTYPE="$(uname)"
  11. # Different sed version for different os types...
  12. _sed() {
  13. # https://github.com/lukas2511/letsencrypt.sh/blob/master/letsencrypt.sh
  14. if [ "Linux" = "${OSTYPE}" ] ; then
  15. sed -r "${@}"
  16. else
  17. sed -E "${@}"
  18. fi
  19. }
  20. urlencode() {
  21. # http://stackoverflow.com/a/10797966
  22. echo -n "${1}" | curl -Gso /dev/null -w %{url_effective} --data-urlencode @- "" | cut -c 3-
  23. }
  24. ls */????/??/??/????\ *.json | cut -d / -f 1-4 | sort | uniq | while read dir
  25. do
  26. # limit parallelism? http://stackoverflow.com/a/6513254
  27. {
  28. cd "${cwd}" && cd "${dir}"
  29. [ -r "${dst}" ] && { echo "${dir}/${dst} ... skipping" 1>&2 ; continue ; }
  30. {
  31. echo "${dir}" 1>&2
  32. echo "<?xml version='1.0' encoding='utf-8'?><rdf:RDF xmlns:dct='http://purl.org/dc/terms/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>"
  33. for jso in *.json
  34. do
  35. broadcast="$(basename "${jso}" .json)"
  36. podcast="$(_sed 's/.+"name":"([^"]+)".+/\1/' "${jso}")"
  37. broadcast_url="$(urlencode "${broadcast}")"
  38. podcast_url="$(urlencode "${podcast}")"
  39. echo "<rdf:Description rdf:about='${broadcast_url}'><dct:isPartOf rdf:resource='../../../../../podcasts/${podcast_url}/'/></rdf:Description>"
  40. done
  41. echo "</rdf:RDF>"
  42. } > "${dst}"
  43. xmllint --noout "${dst}" || { echo "${dir}/${dst} broken" 1>&2 ; rm "${dst}" ; continue ; }
  44. } # & don't do parallel.
  45. done
  46. wait
  47. cd "${cwd}"
  48. ls */????/??/??/podcasts.rdf | wc -l