scuti
/
ud120-projects


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
							#!/usr/bin/python3

print("Checking for nltk")
try:
    import nltk
except ImportError:
    print("You should install nltk before continuing")

print("Checking for numpy")
try:
    import numpy
except ImportError:
    print("You should install numpy before continuing")

print("Checking for scipy")
try:
    import scipy
except:
    print("You should install scipy before continuing")

print("Checking for sklearn")
try:
    import sklearn
except:
    print("You should install sklearn before continuing")

print("Downloading the Enron dataset (this may take a while)")
print("To check on progress, you can cd up one level, then execute <ls -lthr>")
print("Enron dataset should be last item on the list, along with its current size")
print("Download will complete at about 1.82 GB")

import requests
url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
filename = "../enron_mail_20150507.tar.gz"
with open(filename, "wb") as f:
    r = requests.get(url)
    f.write(r.content)
print("Download Complete!")

print("Unzipping Enron dataset (This may take a while)")
import tarfile
tfile = tarfile.open("../enron_mail_20150507.tar.gz")
tfile.extractall(".")
tfile.close()

print("You're ready to go!")