DDGSearch 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. #!/bin/bash
  2. #
  3. ############################
  4. # #
  5. # DuckDuckGo Search Script #
  6. # #
  7. ############################
  8. #
  9. # A simple script that takes a textual argument, searches DuckDuckGo, and outputs
  10. # a series of links in a text document as DDGresults.txt, and to the command line
  11. #
  12. # Inititally created as a component of Category's "Chaffer", inspired by the book
  13. # "Little Brother" by Cory Doctorow - currently a WIP
  14. #
  15. # Store the provided argument in variable SEARCH
  16. SEARCH=$1
  17. # Replace any spaces with "+" characters
  18. SEARCH=${SEARCH// /+}
  19. # Use html version of DuckDuckGo to get results of the search in html format
  20. wget -O temp.html -q https://duckduckgo.com/html/?q=$SEARCH
  21. # Restrict to lines with a href link (generally most <a> tags)
  22. cat temp.html | grep href >> temp.html
  23. # Replace broken characters (":", "/", "-") with sed
  24. sed -i -e 's/%3A/:/g' temp.html
  25. sed -i -e 's#%2F#/#g' temp.html
  26. sed -i -e 's/%2D/-/g' temp.html
  27. # Using '#' as a delimiter for %2F, as replacing some with forward slashes.
  28. # Looks ugly, but works.
  29. # Remove DDG /l/.. shite
  30. sed -i -e 's#/l/?kh=-1&amp;uddg=##g' temp.html
  31. # Output just the links between href and double-quote, and strip tracking crap
  32. cat temp.html | grep -Po '(?<=href=")[^"]*' | grep http > tmplinks.txt
  33. # Strip duplicates from tmplinks.txt, then record any non-DuckDuckGo links
  34. sort -u tmplinks.txt | grep -vi duckduckgo | grep -v duck.co > DDGresults.txt
  35. # Remove temporary files
  36. rm temp.html
  37. rm tmplinks.txt
  38. # Output list of links
  39. cat DDGresults.txt