evaluate_poi_identifier.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. #!/usr/bin/python
  2. """
  3. Starter code for the evaluation mini-project.
  4. Start by copying your trained/tested POI identifier from
  5. that which you built in the validation mini-project.
  6. This is the second step toward building your POI identifier!
  7. Start by loading/formatting the data...
  8. """
  9. import os
  10. import joblib
  11. import sys
  12. sys.path.append(os.path.abspath("../tools/"))
  13. from feature_format import featureFormat, targetFeatureSplit
  14. from sklearn import tree
  15. from sklearn.model_selection import train_test_split
  16. from sklearn.metrics import precision_score, recall_score
  17. # cmp1 = numpy array
  18. def get_accuracy(cmp1, cmp2):
  19. assert(len(cmp1) == len(cmp2))
  20. total_len = len(cmp1)
  21. count_correct = 0
  22. for i in range(total_len):
  23. foo = float(cmp1[i])
  24. # print("(%f, %f)" % (foo, cmp2[i]) )
  25. if foo == cmp2[i]:
  26. count_correct += 1
  27. return(count_correct/total_len)
  28. data_dict = joblib.load(open("../final_project/final_project_dataset.pkl", "rb") )
  29. ### add more features to features_list!
  30. features_list = ["poi", "salary"]
  31. data = featureFormat(
  32. data_dict,
  33. features_list,
  34. sort_keys = '../tools/python2_lesson14_keys.pkl')
  35. labels, features = targetFeatureSplit(data)
  36. test_size = 0.30
  37. random_state = 42 # or 37
  38. features_train, features_test, labels_train, labels_test = train_test_split(
  39. features, labels,
  40. test_size=test_size, random_state=random_state)
  41. clf = tree.DecisionTreeClassifier()
  42. clf.fit(features_train, labels_train)
  43. predictions = clf.predict(features_test)
  44. whatif = [0. for i in range (len(labels_test))]
  45. print(get_accuracy(labels_test, whatif))
  46. prec = precision_score(labels_test, predictions)
  47. print(prec)
  48. recall = recall_score(labels_test, predictions)
  49. print(recall)
  50. whatif_pred = [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1]
  51. whatif_true = [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0]
  52. prec = precision_score(whatif_true, whatif_pred)
  53. print(prec)
  54. recall = recall_score(whatif_true, whatif_pred)
  55. print(recall)