scuti
/
ud120-projects


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
							#!/usr/bin/python


"""
    Starter code for the evaluation mini-project.
    Start by copying your trained/tested POI identifier from
    that which you built in the validation mini-project.

    This is the second step toward building your POI identifier!

    Start by loading/formatting the data...
"""
import os
import joblib
import sys
sys.path.append(os.path.abspath("../tools/"))
from feature_format import featureFormat, targetFeatureSplit
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

# cmp1 = numpy array
def get_accuracy(cmp1, cmp2):
    assert(len(cmp1) == len(cmp2))
    total_len = len(cmp1)
    count_correct = 0
    for i in range(total_len):
        foo = float(cmp1[i])
        # print("(%f, %f)" % (foo, cmp2[i]) )
        if  foo == cmp2[i]:
            count_correct += 1
    return(count_correct/total_len)


data_dict = joblib.load(open("../final_project/final_project_dataset.pkl", "rb") )

### add more features to features_list!
features_list = ["poi", "salary"]

data = featureFormat(
    data_dict, 
    features_list, 
    sort_keys = '../tools/python2_lesson14_keys.pkl')
labels, features = targetFeatureSplit(data)

test_size = 0.30
random_state = 42 # or 37
features_train, features_test, labels_train, labels_test = train_test_split(
    features, labels, 
    test_size=test_size, random_state=random_state)

clf = tree.DecisionTreeClassifier()
clf.fit(features_train, labels_train)
predictions = clf.predict(features_test)

whatif = [0. for i in range (len(labels_test))]
print(get_accuracy(labels_test, whatif))

prec   = precision_score(labels_test, predictions)
print(prec)
recall = recall_score(labels_test, predictions)
print(recall)

whatif_pred = [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1]
whatif_true = [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0]

prec   = precision_score(whatif_true, whatif_pred)
print(prec)
recall = recall_score(whatif_true, whatif_pred)
print(recall)