scuti
/
ud120-projects


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
							#!/usr/bin/python3

""" 
    Starter code for exploring the Enron dataset (emails + finances);
    loads up the dataset (pickled dict of dicts).

    The dataset has the form:
    enron_data["LASTNAME FIRSTNAME MIDDLEINITIAL"] = { features_dict }

    {features_dict} is a dictionary of features associated with that person.
    You should explore features_dict as part of the mini-project,
    but here's an example to get you started:

    enron_data["SKILLING JEFFREY K"]["bonus"] = 5600000
    
"""

import joblib
enron_data = joblib.load(open("../final_project/final_project_dataset.pkl", "rb"))

import argparse
import random

# import sys
# sys.path.append("../tools/")
# import feature_format

def highest_paid(dataset):
    highest_paid = 0
    the_rich_bitch = str()
    for person in dataset:
        if dataset[person]["poi"] == False:
            continue
        total_payments = dataset[person]["total_payments"]
        if highest_paid < total_payments:
            the_rich_bitch = person
            highest_paid = total_payments
    return {the_rich_bitch, highest_paid}

def get_person(dataset, interest=True):
    ppl = [p for p in dataset \
        if dataset[p]["poi"] == interest
    ]
    x = len(ppl) - 1
    index = random.randint(0 , x) # inclusive
    print(ppl[index])
    print(dataset[ppl[index]])

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Project to familiarize with the Enron data set.'
    )
    parser.add_argument(
        'action', 
        nargs='?', 
        type=str, 
        help='Action to perform (e.g., list_features)'
    )
    args = parser.parse_args()
    if args.action == 'list_features':
        print("Showing features available...")
        print( enron_data["PRENTICE JAMES"].keys())
    if args.action == "summary":
        print(len(enron_data), 
              "people are in the data set."
        )
        print(len(enron_data["SKILLING JEFFREY K"]), 
              "available features for each person." 
        )
        no_email = [peep for peep in enron_data \
            if enron_data[peep]["email_address"] == 'NaN']
        print(len(no_email), 
              "people don't have email addresses."
        )
        print(len(enron_data) - len(no_email), 
              "people have an email address."
        )
        have_salary = [peep for peep in enron_data \
            if enron_data[peep]["salary"] != 'NaN']
        print(len(have_salary), 
              "people have a quantified salary."
        )
        print(highest_paid(enron_data),
              "= the highest paid person"
        )
        print(enron_data["SKILLING JEFFREY K"]["exercised_stock_options"],
              " <-- his value of exercised stock options ($)"
        )
        pois = [p for p in enron_data  if enron_data[p]["poi"] == True]
        print("there are %i people of interest in the dataset." % len(pois))
        # print( enron_data["PRENTICE JAMES"]["total_stock_value"] )
        # print( enron_data["COLWELL WESLEY"]["from_this_person_to_poi"])
    if args.action == "show_poi":
        print("This is someone who is a person of interest.")
        nobody = get_person(enron_data, interest=True)
    if args.action == "show_nonpoi":
        print("This is someone who is not a person of interest.")
        nobody = get_person(enron_data, interest=False)
    if args.action == "missing_data":
        missing_fin_data = [p for p in enron_data \
            if enron_data[p]["total_payments"] == 'NaN']
        how_many = len(missing_fin_data)
        how_many_pct = how_many/len(enron_data) * 100
        print("%i (%0.2f%%) don't have financial data." \
            % (how_many, how_many_pct))
        # pois = [p for p in enron_data \
        #     if enron_data[p]["poi"] == True \
        #     and enron_data[p]["total_payments"] == 'NaN']