check.sh 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #!/bin/bash
  2. # Copyright 2014 Michał Masłowski <mtjm@mtjm.eu>
  3. # Copyright 2019,2020,2023 bill-auger <bill-auger@programmer.net>
  4. #
  5. # Copying and distribution of this file, with or without modification,
  6. # are permitted in any medium without royalty provided the copyright
  7. # notice and this notice are preserved. This file is offered as-is,
  8. # without any warranty.
  9. # blacklist::check.sh Verify the blacklist entries are correctly formatted.
  10. readonly BLACKLISTS=( aur-blacklist.txt \
  11. blacklist.txt \
  12. your-gaming-freedom-blacklist.txt \
  13. your-init-freedom-blacklist.txt \
  14. your-privacy-blacklist.txt )
  15. readonly REF_REGEX='^[^:]*:[^:]*::[^:]*:.*$'
  16. readonly SYNTAX_REGEX='^[^:]*:[^:]*:(debian|fedora|fsf|parabola|savannah)?:[^:]*:.*$'
  17. readonly CSV_CHAR=':'
  18. readonly SEP_CHAR='!'
  19. readonly LOG_FILE=./check.log ; rm -f ${LOG_FILE} ;
  20. exit_status=0
  21. # TODO: the best sorting results are acheived when the field separator ($CSV_CHAR)
  22. # precedes any valid package name character in ASCII order
  23. # the lowest of which is ASCII 43 '+', and spaces are not allowed;
  24. # so ASCII 33 ('!') serves this purpose quite well
  25. # someday, we should re-write the tools to use parse on '!' instead of ':'
  26. # if that were done, then the `sort` command alone would yeild
  27. # the same results as this procedure, except for removing empty lines
  28. unsortable="$(
  29. for blacklist in "${BLACKLISTS[@]}"
  30. do echo -n "sorting and cleaning: '${blacklist}' ... " >> ${LOG_FILE}
  31. if grep ${SEP_CHAR} ${blacklist}
  32. then echo "ERROR: can not sort - contains '${SEP_CHAR}' char" >> ${LOG_FILE}
  33. retval=1
  34. else echo "OK" >> ${LOG_FILE}
  35. cat ${blacklist} | tr "${CSV_CHAR}" "${SEP_CHAR}" | sort | uniq | \
  36. sed '/^[[:space:]]*$/d' | tr "${SEP_CHAR}" "${CSV_CHAR}" > ${blacklist}.temp
  37. mv ${blacklist}.temp ${blacklist}
  38. fi
  39. done
  40. )"
  41. if [[ -n "$unsortable" ]]
  42. then printf "\n[Entries containing '%s' char]:\n%s\n\n" "${SEP_CHAR}" "$unsortable" >> ${LOG_FILE}
  43. echo -n "ERROR: one of the data files is unsortable - check can not continue"
  44. echo " - correct the malformed entries, then run this script again"
  45. exit 1
  46. fi
  47. printf "\n\nchecking for entries with syntax errors: ... " >> ${LOG_FILE}
  48. invalid="$(grep -E -v ${SYNTAX_REGEX} "${BLACKLISTS[@]}")"
  49. if [[ -z "$invalid" ]]
  50. then printf "OK\n" >> ${LOG_FILE}
  51. else printf "\n[Incorrectly formatted entries]:\n%s\n\n" "$invalid" >> ${LOG_FILE}
  52. exit_status=1
  53. fi
  54. printf "\n\nchecking for entries without reference to detailed description: ... " >> ${LOG_FILE}
  55. unsourced="$(grep -E ${REF_REGEX} "${BLACKLISTS[@]}")"
  56. if [[ -z "$unsourced" ]]
  57. then printf "OK\n" >> ${LOG_FILE}
  58. else printf "\n[citation needed]:\n%s\n\n" "$unsourced" >> ${LOG_FILE}
  59. exit_status=1
  60. fi
  61. # summary
  62. totals=$(wc -l "${BLACKLISTS[@]}" | sed 's|\(.*\)|\t\1|')
  63. n_unsourced=$( [[ "${unsourced}" ]] && wc -l <<<${unsourced} || echo 0 )
  64. n_malformed=$( [[ "${invalid}" ]] && wc -l <<<${invalid} || echo 0 )
  65. echo -e "summary:\n\t* number of entries total:\n${totals}"
  66. (( ${n_malformed} )) && echo -e "\t* number of entries improperly formatted: ${n_malformed}"
  67. (( ${n_unsourced} )) && echo -e "\t* number of entries needing citation: ${n_unsourced}"
  68. (( ${exit_status} )) && echo "refer to the file: '${LOG_FILE}' for details"
  69. exit $exit_status