get_location_id 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #!/bin/bash
  2. debug=0
  3. me="${0##*/}"
  4. useragent="Mozilla/5.0 (Windows NT 10.0; rv:124.0) Gecko/20100101 Firefox/124.0"
  5. file=""
  6. urlbase="https://www.geonames.org/"
  7. deps=( curl xmllint )
  8. max=10
  9. only=0
  10. nonl=0 # omit final newline if set to 1
  11. sep='\n'
  12. usage() {
  13. [[ "$*" != "" ]] && echo "$*"
  14. cat <<EOF
  15. Enter your query to get a location ID required to retrieve weather data.
  16. Queries $urlbase
  17. Dependencies: ${deps[@]}
  18. Usage: $me [options] [free form location string]
  19. Options:
  20. -m int Maximum number of results (default $max)
  21. -U str Override user agent. Default:
  22. $useragent
  23. -o only output numerical id. Combine with "-m1" for usage in scripts.
  24. -d Debugging output
  25. -f Fuzzy search on
  26. -m int Maximum Results (default: $max)
  27. -F str Parse local file. Invalidates -U option and query.
  28. Mostly for debugging.
  29. -N Omit final newline
  30. -s str Result separator. Default: $sep
  31. EOF
  32. exit 1
  33. }
  34. xpath() {
  35. echo "$2" | xmllint --html --noblanks --nowrap --recover --nonet --xpath "$1" - 2>/dev/null | perl -CS -MHTML::Entities -pe 'decode_entities($_);'
  36. # xmllint creates loads of error messages even when it's working correctly
  37. # if you really want to see these messages, remove '2>/dev/null'
  38. }
  39. # simple dependency check
  40. for dep in "${deps[@]}"; do
  41. type -f $dep >/dev/null || usage
  42. done
  43. perl -MHTML::Entities -e 1 || exit 1
  44. while getopts "fF:U:hdm:os:N" opt; do
  45. case $opt in
  46. m) [[ "$OPTARG" =~ [0-9]+ ]] && (( OPTARG > 0 )) || usage "Option -${opt}: invalid number $OPTARG"
  47. max="$OPTARG"
  48. ;;
  49. F) [ -r "$OPTARG" ] && file="$OPTARG" || usage "cannot read $OPTARG"
  50. ;;
  51. U) useragent="$OPTARG"
  52. ;;
  53. f) fuzzy=1
  54. ;;
  55. d) debug=1
  56. ;;
  57. o) only=1
  58. ;;
  59. N) nonl=1
  60. ;;
  61. s) sep="$OPTARG"
  62. ;;
  63. *) usage
  64. ;;
  65. esac
  66. done
  67. shift $((OPTIND-1))
  68. query="$*"
  69. [[ "$debug" == 0 ]] && exec 2>/dev/null
  70. echo "useragent: $useragent" >&2
  71. echo "debug: $debug" >&2
  72. [[ "$fuzzy" == 1 ]] && urlbase="$urlbase/search.html?fuzzy=0.6" || urlbase="$urlbase/search.html"
  73. if [[ "$file" == "" ]]; then
  74. # cannot use read -p "..." because the prompt is output to stderr.
  75. [[ "$query" == "" ]] && echo -n "Please enter a free form search for your location: " && read -r query
  76. html="$(curl --data-urlencode "q=$query" --user-agent "$useragent" "$urlbase")"
  77. else
  78. html="$(<"$file")"
  79. fi
  80. echo "location: $query" >&2
  81. for((j=3;j<max+3;j++)); do
  82. #~ query="//table[@class=\"restable\"]//tr[$j]/td[1]/a/@href | //table[@class=\"restable\"]//tr[$j]/td[2]/span[@class=\"geo\"]/span/text() | \
  83. #~ //table[@class=\"restable\"]//tr[$j]/td[2]/a/text() | //table[@class=\"restable\"]//tr[$j]/td[3]/text() | \
  84. #~ //table[@class=\"restable\"]//tr[$j]/td[3]/small/text()"
  85. # without lat/lon info
  86. query="//table[@class=\"restable\"]//tr[$j]/td[1]/a/@href | \
  87. //table[@class=\"restable\"]//tr[$j]/td[2]/a/text() | //table[@class=\"restable\"]//tr[$j]/td[3]/text() | \
  88. //table[@class=\"restable\"]//tr[$j]/td[3]/small/text()"
  89. #~ xpath "$query" "$html"; exit
  90. mapfile -t table <<<"$(xpath "$query" "$html")"
  91. id="${table[0]}"
  92. id="${id#*/}"
  93. id="${id%/*}"
  94. [[ "$id" == "" ]] && continue
  95. ((j>3)) && printf "$sep"
  96. printf '%s ' "$id"
  97. [[ "$only" == 1 ]] && echo && continue
  98. for((i=1;i<${#table[@]};i++)); do printf '%s ' "${table[i]#, }"; done
  99. #~ town="${table[$((i+1))]}${table[$((i+4))]}"
  100. #~ caption="${table[$((i+5))]}"
  101. #~ lat="${table[$((i+2))]}"
  102. #~ lon="${table[$((i+3))]}"
  103. #~ printf "\n%-10s%-18s %s\n %-18s (%s)\n" "$id" "$lat" "$town" "$lon" "$caption"
  104. done
  105. ((nonl==0)) && echo