startup.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. #!/usr/bin/python3
  2. print("Checking for nltk")
  3. try:
  4. import nltk
  5. except ImportError:
  6. print("You should install nltk before continuing")
  7. print("Checking for numpy")
  8. try:
  9. import numpy
  10. except ImportError:
  11. print("You should install numpy before continuing")
  12. print("Checking for scipy")
  13. try:
  14. import scipy
  15. except:
  16. print("You should install scipy before continuing")
  17. print("Checking for sklearn")
  18. try:
  19. import sklearn
  20. except:
  21. print("You should install sklearn before continuing")
  22. print("Downloading the Enron dataset (this may take a while)")
  23. print("To check on progress, you can cd up one level, then execute <ls -lthr>")
  24. print("Enron dataset should be last item on the list, along with its current size")
  25. print("Download will complete at about 1.82 GB")
  26. import requests
  27. url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
  28. filename = "../enron_mail_20150507.tar.gz"
  29. with open(filename, "wb") as f:
  30. r = requests.get(url)
  31. f.write(r.content)
  32. print("Download Complete!")
  33. print("Unzipping Enron dataset (This may take a while)")
  34. import tarfile
  35. tfile = tarfile.open("../enron_mail_20150507.tar.gz")
  36. tfile.extractall(".")
  37. tfile.close()
  38. print("You're ready to go!")