1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- #!/usr/bin/python
- """
- Starter code for the evaluation mini-project.
- Start by copying your trained/tested POI identifier from
- that which you built in the validation mini-project.
- This is the second step toward building your POI identifier!
- Start by loading/formatting the data...
- """
- import os
- import joblib
- import sys
- sys.path.append(os.path.abspath("../tools/"))
- from feature_format import featureFormat, targetFeatureSplit
- from sklearn import tree
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import precision_score, recall_score
- # cmp1 = numpy array
- def get_accuracy(cmp1, cmp2):
- assert(len(cmp1) == len(cmp2))
- total_len = len(cmp1)
- count_correct = 0
- for i in range(total_len):
- foo = float(cmp1[i])
- # print("(%f, %f)" % (foo, cmp2[i]) )
- if foo == cmp2[i]:
- count_correct += 1
- return(count_correct/total_len)
- data_dict = joblib.load(open("../final_project/final_project_dataset.pkl", "rb") )
- ### add more features to features_list!
- features_list = ["poi", "salary"]
- data = featureFormat(
- data_dict,
- features_list,
- sort_keys = '../tools/python2_lesson14_keys.pkl')
- labels, features = targetFeatureSplit(data)
- test_size = 0.30
- random_state = 42 # or 37
- features_train, features_test, labels_train, labels_test = train_test_split(
- features, labels,
- test_size=test_size, random_state=random_state)
- clf = tree.DecisionTreeClassifier()
- clf.fit(features_train, labels_train)
- predictions = clf.predict(features_test)
- whatif = [0. for i in range (len(labels_test))]
- print(get_accuracy(labels_test, whatif))
- prec = precision_score(labels_test, predictions)
- print(prec)
- recall = recall_score(labels_test, predictions)
- print(recall)
- whatif_pred = [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1]
- whatif_true = [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0]
- prec = precision_score(whatif_true, whatif_pred)
- print(prec)
- recall = recall_score(whatif_true, whatif_pred)
- print(recall)
|