chapter-parser.sh 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. #!/bin/sh
  2. # Copyright (C) 2020 user18130814200115-2
  3. # Copyright (C) 2021 Kevin "The Nuclear" Bloom <nuclearkev@dragora.org>
  4. #
  5. # This file is part of libre-bible (LB).
  6. #
  7. # LB is free software: you can redistribute it and/or modify
  8. # it under the terms of the 2-Clause BSD License.
  9. #
  10. # LB is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # 2-Clause BSD License for more details.
  14. #
  15. # You should have received a copy of the 2-Clause BSD License
  16. # along with LB. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
  17. echo "[" > output.txt
  18. for verse in $(seq 1 1 51)
  19. do
  20. if [ $verse -lt 10 ]
  21. then
  22. num="0${verse}"
  23. else
  24. num=${verse}
  25. fi
  26. file_name=SIR${num}.htm
  27. wget https://ebible.org/eng-kjv/${file_name}
  28. echo "[" >> output.txt
  29. html2text -width 1000 ${file_name} | while read line
  30. do
  31. if [[ ${line:0:1} =~ ^[0-9]+$ ]]
  32. then
  33. IFS=' ' read -raarr <<< "${line}"
  34. if ! [[ ${arr[1]} = "" ]]
  35. then
  36. echo "\"${arr[1]}\"," >> output.txt
  37. fi
  38. fi
  39. done
  40. echo "\"END OF CHAPTER\"]," >> output.txt
  41. done
  42. echo "]" >> output.txt
  43. tr -d '\n' < output.txt > output.txt
  44. rm *.htm