views.py 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119
  1. import bz2
  2. import math
  3. import json
  4. import copy
  5. import numpy as np
  6. import pandas as pd
  7. from datetime import datetime
  8. from dateutil.relativedelta import relativedelta
  9. from typing import Dict, Optional, List, Any, Set
  10. from app import app, w3, ns, rds, known_addresses, tornado_pools, reveal_dists
  11. from app.models import \
  12. Address, ExactMatch, GasPrice, MultiDenom, LinkedTransaction, TornMining, \
  13. TornadoDeposit, TornadoWithdraw, Embedding, DepositTransaction
  14. from app.utils import \
  15. get_anonymity_score, get_order_command, \
  16. entity_to_int, entity_to_str, to_dict, \
  17. heuristic_to_str, is_valid_address, get_today_date_str, \
  18. is_tornado_address, get_equal_user_deposit_txs, find_reveals, \
  19. AddressRequestChecker, TornadoPoolRequestChecker, \
  20. TransactionRequestChecker, PlotRequestChecker, \
  21. default_address_response, default_tornado_response, \
  22. default_transaction_response, default_plot_response, \
  23. NAME_COL, ENTITY_COL, CONF_COL, EOA, DEPOSIT, EXCHANGE, NODE, \
  24. GAS_PRICE_HEUR, DEPO_REUSE_HEUR, DIFF2VEC_HEUR, SAME_NUM_TX_HEUR, \
  25. SAME_ADDR_HEUR, LINKED_TX_HEUR, TORN_MINE_HEUR, DIFF2VEC_HEUR
  26. from app.lib.w3 import query_web3, get_ens_name, resolve_address
  27. from flask import request, Request, Response
  28. from flask import render_template
  29. from sqlalchemy import or_
  30. from app.utils import get_known_attrs, get_display_aliases
  31. PAGE_LIMIT = 50
  32. HARD_MAX: int = 1000
  33. @app.route('/', methods=['GET'])
  34. @app.route('/index', methods=['GET'])
  35. @app.route('/cluster', methods=['GET'])
  36. @app.route('/transactions', methods=['GET'])
  37. def index():
  38. return render_template('index.html')
  39. @app.route('/utils/aliases', methods=['GET'])
  40. def alias():
  41. response: str = json.dumps(get_display_aliases())
  42. return Response(response=response)
  43. @app.route('/utils/istornado', methods=['GET'])
  44. def istornado():
  45. address: str = request.args.get('address', '')
  46. address: str = resolve_address(address, ns)
  47. address: str = address.lower()
  48. output: Dict[str, Any] = {
  49. 'data': {
  50. 'address': address,
  51. 'is_tornado': 1,
  52. 'amount': 0,
  53. 'currency': '',
  54. },
  55. 'success': 0,
  56. }
  57. if not is_valid_address(address):
  58. return Response(json.dumps(output))
  59. is_tornado: bool = int(is_tornado_address(address))
  60. if not is_tornado:
  61. amount = None
  62. currency = None
  63. else:
  64. pool: pd.DataFrame = \
  65. tornado_pools[tornado_pools.address == address].iloc[0]
  66. amount, currency = pool.tags.strip().split()
  67. amount = int(amount)
  68. output['data']['is_tornado'] = is_tornado
  69. output['data']['amount'] = amount
  70. output['data']['currency'] = currency
  71. output['success'] = 1
  72. response: str = json.dumps(output)
  73. return Response(response)
  74. @app.route('/utils/gettornadopools', methods=['GET'])
  75. def get_tornado_pools():
  76. pools = []
  77. for _, pool in tornado_pools.iterrows():
  78. # amount, currency = pool.tags.strip().split()
  79. pools.append({
  80. 'address': pool.address,
  81. 'name': pool.tags,
  82. })
  83. output: Dict[str, Any] = {
  84. 'data': {'pools': pools},
  85. 'success': 1,
  86. }
  87. response: str = json.dumps(output)
  88. return Response(response)
  89. @app.route('/search', methods=['GET'])
  90. def search():
  91. address: str = request.args.get('address', '')
  92. # after this call, we should expect address to be an address
  93. address: str = resolve_address(address, ns)
  94. address: str = address.lower()
  95. # do a simple check that the address is valid
  96. if not is_valid_address(address):
  97. return default_address_response()
  98. # check if address is a tornado pool or not
  99. is_tornado: bool = is_tornado_address(address)
  100. # change request object
  101. request.args = dict(request.args)
  102. request.args['address'] = address
  103. if is_tornado:
  104. # ---------------------------------------------------------
  105. # MODE #1
  106. # This is a TCash pool, so we can show specific information
  107. # about compromised addresses via our heuristics.
  108. # ---------------------------------------------------------
  109. response: Response = search_tornado(request)
  110. else:
  111. # ---------------------------------------------------------
  112. # MODE #2
  113. # This is a regular address, so we can search our dataset
  114. # for its cluster and complimentary information.
  115. # ---------------------------------------------------------
  116. response: Response = search_address(request)
  117. return response
  118. @app.route('/search/compromised', methods=['GET'])
  119. def haveibeencompromised():
  120. address: str = request.args.get('address', '')
  121. pool: str = request.args.get('pool', '') # tornado pool address
  122. address: str = resolve_address(address, ns)
  123. output: Dict[str, Any] = {
  124. 'data': {
  125. 'address': address,
  126. 'pool': pool,
  127. 'compromised_size': 0,
  128. 'compromised': [],
  129. },
  130. 'success': 0,
  131. }
  132. if not is_valid_address(address) or not is_valid_address(pool):
  133. return Response(json.dumps(output))
  134. # find all the deposit transactions made by user for this pool
  135. deposits: Optional[List[TornadoDeposit]] = \
  136. TornadoDeposit.query.filter_by(
  137. from_address = address,
  138. tornado_cash_address = pool,
  139. ).all()
  140. deposit_txs: Set[str] = set([d.hash for d in deposits])
  141. # search for these txs in the reveal tables
  142. exact_match_reveals: Set[str] = find_reveals(deposit_txs, ExactMatch)
  143. gas_price_reveals: Set[str] = find_reveals(deposit_txs, GasPrice)
  144. multi_denom_reveals: Set[str] = find_reveals(deposit_txs, MultiDenom)
  145. linked_tx_reveals: Set[str] = find_reveals(deposit_txs, LinkedTransaction)
  146. torn_mine_reveals: Set[str] = find_reveals(deposit_txs, TornMining)
  147. def format_compromised(
  148. exact_match_reveals: Set[str],
  149. gas_price_reveals: Set[str],
  150. multi_denom_reveals: Set[str],
  151. linked_tx_reveals: Set[str],
  152. torn_mine_reveals: Set[str],
  153. ) -> List[Dict[str, Any]]:
  154. compromised: List[Dict[str, Any]] = []
  155. for reveal in exact_match_reveals:
  156. compromised.append({'heuristic': heuristic_to_str(1), 'transaction': reveal})
  157. for reveal in gas_price_reveals:
  158. compromised.append({'heuristic': heuristic_to_str(2), 'transaction': reveal})
  159. for reveal in multi_denom_reveals:
  160. compromised.append({'heuristic': heuristic_to_str(3), 'transaction': reveal})
  161. for reveal in linked_tx_reveals:
  162. compromised.append({'heuristic': heuristic_to_str(4), 'transaction': reveal})
  163. for reveal in torn_mine_reveals:
  164. compromised.append({'heuristic': heuristic_to_str(5), 'transaction': reveal})
  165. return compromised
  166. # add compromised sets to response
  167. compromised: List[Dict[str, Any]] = format_compromised(
  168. exact_match_reveals, gas_price_reveals, multi_denom_reveals,
  169. linked_tx_reveals, torn_mine_reveals)
  170. output['data']['compromised'] = compromised
  171. output['data']['compromised_size'] = len(compromised)
  172. output['success'] = 1
  173. response: str = json.dumps(output)
  174. return Response(response)
  175. def query_diff2vec(node: Embedding, address) -> List[Dict[str, Any]]:
  176. """
  177. Search the embedding table to fetch neighbors from Diff2Vec cluster.
  178. """
  179. cluster: List[Dict[str, Any]] = []
  180. cluster_conf: float = 0
  181. if node is not None:
  182. neighbors: List[int] = json.loads(node.neighbors)
  183. distances: List[float] = json.loads(node.distances)
  184. for neighbor, distance in zip(neighbors, distances):
  185. # swap terms b/c of upload accident
  186. neighbor, distance = distance, neighbor
  187. if neighbor == address: continue # skip
  188. member: Dict[str, Any] = {
  189. 'address': neighbor,
  190. # '_distance': distance,
  191. # add one to make max 1
  192. 'conf': round(float(1./abs(10.*distance+1.)), 3),
  193. 'heuristic': DIFF2VEC_HEUR,
  194. 'entity': NODE,
  195. 'ens_name': get_ens_name(neighbor, ns),
  196. }
  197. cluster.append(member)
  198. cluster_conf += member['conf']
  199. cluster_size: int = len(cluster)
  200. cluster_conf: float = cluster_conf / float(cluster_size)
  201. return cluster, cluster_size, cluster_conf
  202. def compute_anonymity_score(
  203. addr: Optional[Address],
  204. ens_name: Optional[str] = None,
  205. exchange_weight: float = 0.1,
  206. slope: float = 0.1,
  207. extra_cluster_sizes: List[int] = [],
  208. extra_cluster_confs: List[float] = []
  209. ) -> float:
  210. """
  211. Only EOA addresses have an anonymity score. If we get an exchange,
  212. we return an anonymity score of 0. If we get a deposit, we return -1,
  213. which represents N/A.
  214. For EOA addresses, we grade the anonymity by the confidence and number
  215. of other EOA addresses in the same cluster, as well as the confidence
  216. and number of other exchanges in the same cluster (which we find through
  217. the deposits this address interacts with). Exchange interactions are
  218. discounted (by `exchange_weight` factor) compared to other EOAs.
  219. If ens_name is provided and not empty, we cap the anonymity score at 90.
  220. If addr is None, we assume clusters are specified in extra_cluster_*.
  221. """
  222. cluster_confs: List[float] = extra_cluster_sizes
  223. cluster_sizes: List[float] = extra_cluster_confs
  224. if addr is not None:
  225. if addr.entity == entity_to_int(DEPOSIT):
  226. return -1 # represents N/A
  227. elif addr.entity == entity_to_int(EXCHANGE):
  228. return 0 # CEX have no anonymity
  229. assert addr.entity == entity_to_int(EOA), \
  230. f'Unknown entity: {entity_to_str(addr.entity)}'
  231. if addr.user_cluster is not None:
  232. # find all other EOA addresses with same `dar_user_cluster`.
  233. num_cluster: int = Address.query.filter(
  234. Address.user_cluster == addr.user_cluster,
  235. or_(Address.entity == entity_to_int(EOA)),
  236. ).limit(HARD_MAX).count()
  237. cluster_confs.append(addr.conf)
  238. cluster_sizes.append(num_cluster)
  239. # find all DEPOSIT address with same `user_cluster`.
  240. deposits: Optional[List[Address]] = Address.query.filter(
  241. Address.user_cluster == addr.user_cluster,
  242. Address.entity == entity_to_int(DEPOSIT),
  243. ).limit(HARD_MAX).all()
  244. exchanges: Set[str] = set([
  245. deposit.exchange_cluster for deposit in deposits])
  246. cluster_confs.append(addr.conf * exchange_weight)
  247. cluster_sizes.append(len(exchanges))
  248. cluster_confs: np.array = np.array(cluster_confs)
  249. cluster_sizes: np.array = np.array(cluster_sizes)
  250. score: float = get_anonymity_score(
  251. cluster_confs, cluster_sizes, slope = slope)
  252. if ens_name is not None:
  253. if len(ens_name) > 0 and '.eth' in ens_name:
  254. # having an ENS name caps your maximum anonymity score
  255. score: float = min(score, 0.90)
  256. return score
  257. def query_heuristic(address: str, class_: Any) -> Set[str]:
  258. """
  259. Given an address, find out how many times this address' txs
  260. appear in a heuristic. Pass the table class for heuristic.
  261. """
  262. rows: Optional[List[class_]] = \
  263. class_.query.filter_by(address = address).all()
  264. cluster_txs: List[str] = []
  265. if (len(rows) > 0):
  266. clusters: List[int] = list(set([row.cluster for row in rows]))
  267. cluster: List[class_] = \
  268. class_.query.filter(class_.cluster.in_(clusters)).all()
  269. cluster_txs: List[str] = [row.transaction for row in cluster]
  270. return set(cluster_txs) # no duplicates
  271. def query_tornado_stats(address: str) -> Dict[str, Any]:
  272. """
  273. Given a user address, we want to supply a few statistics:
  274. 1) Number of deposits made to Tornado pools.
  275. 2) Number of withdraws made to Tornado pools.
  276. 3) Number of deposits made that are part of a cluster or of a TCash reveal.
  277. """
  278. exact_match_txs: Set[str] = query_heuristic(address, ExactMatch)
  279. gas_price_txs: Set[str] = query_heuristic(address, GasPrice)
  280. multi_denom_txs: Set[str] = query_heuristic(address, MultiDenom)
  281. linked_txs: Set[str] = query_heuristic(address, LinkedTransaction)
  282. torn_mine_txs: Set[str] = query_heuristic(address, TornMining)
  283. reveal_txs: Set[str] = set().union(
  284. exact_match_txs, gas_price_txs, multi_denom_txs,
  285. linked_txs, torn_mine_txs)
  286. # find all txs where the from_address is the current user.
  287. deposits: Optional[List[TornadoDeposit]] = \
  288. TornadoDeposit.query.filter_by(from_address = address).all()
  289. deposit_txs: Set[str] = set([d.hash for d in deposits])
  290. num_deposit: int = len(deposit_txs)
  291. # find all txs where the recipient_address is the current user
  292. withdraws: Optional[List[TornadoWithdraw]] = \
  293. TornadoWithdraw.query.filter_by(recipient_address = address).all()
  294. withdraw_txs: Set[str] = set([w.hash for w in withdraws])
  295. num_withdraw: int = len(withdraw_txs)
  296. all_txs: Set[str] = deposit_txs.union(withdraw_txs)
  297. num_all: int = num_deposit + num_withdraw
  298. num_remain: int = len(all_txs - reveal_txs)
  299. num_remain_exact_match: int = len(all_txs - exact_match_txs)
  300. num_remain_gas_price: int = len(all_txs - gas_price_txs)
  301. num_remain_multi_denom: int = len(all_txs - multi_denom_txs)
  302. num_remain_linked_tx: int = len(all_txs - linked_txs)
  303. num_remain_torn_mine: int = len(all_txs - torn_mine_txs)
  304. num_compromised: int = num_all - num_remain
  305. num_compromised_exact_match = num_all - num_remain_exact_match
  306. num_compromised_gas_price = num_all - num_remain_gas_price
  307. num_compromised_multi_denom = num_all - num_remain_multi_denom
  308. num_compromised_linked_tx = num_all - num_remain_linked_tx
  309. num_compromised_torn_mine = num_all - num_remain_torn_mine
  310. # compute number of txs compromised by TCash heuristics
  311. stats: Dict[str, Any] = dict(
  312. num_deposit = num_deposit,
  313. num_withdraw = num_withdraw,
  314. num_compromised = dict(
  315. all_reveals = num_compromised,
  316. num_compromised_exact_match = num_compromised_exact_match,
  317. num_compromised_gas_price = num_compromised_gas_price,
  318. num_compromised_multi_denom = num_compromised_multi_denom,
  319. num_compromised_linked_tx = num_compromised_linked_tx,
  320. num_compromised_torn_mine = num_compromised_torn_mine,
  321. hovers = dict(
  322. num_compromised_exact_match = '# of deposits to/withdrawals from tornado cash pools linked through the address match heuristic. Address match links transactions if a unique address deposits and withdraws to a Tornado Cash pool.',
  323. num_compromised_gas_price = '# of deposits to/withdrawals from tornado cash pools linked through the unique gas price heuristic. Unique gas price links deposit and withdrawal transactions that use a unique and specific (e.g. 3.1415) gas price.',
  324. num_compromised_multi_denom = '# of deposit/withdrawals into tornado cash pools linked through the multi-denomination reveal. Multi-denomination reveal is when a “source” wallet mixes a specific set of denominations and your “destination” wallet withdraws them all. For example, if you mix 3x 10 ETH, 2x 1 ETH, 1x 0.1 ETH to get 32.1 ETH, you could reveal yourself within the Tornado protocol if no other wallet has mixed this exact denomination set.',
  325. num_compromised_linked_tx = '# of deposits to/withdrawals from tornado cash pools linked through the linked address reveal. Linked address reveal connects wallets that interact outside of Tornado Cash.',
  326. num_compromised_torn_mine = '# of deposits to/withdrawals from tornado cash pools linked through the TORN mining reveal. Careless swapping of Anonymity Points to TORN tokens reveal information of when deposits were made.',
  327. )
  328. ),
  329. num_uncompromised = num_all - num_compromised,
  330. hovers = dict(
  331. num_deposit = '# of deposit transactions into tornado cash pools.',
  332. num_withdraw = '# of withdrawal transactions from tornado cash pools.',
  333. num_compromised = '# of deposits to/withdrawals from tornado cash pools that may be linked through the mis-use of Tornado cash.',
  334. num_uncompromised = '# of deposits to/withdrawals from tornado cash pools that are not potentially linked by the five reveals',
  335. )
  336. )
  337. return stats
  338. def search_address(request: Request) -> Response:
  339. """
  340. Master function for serving address requests. This function
  341. will first check if the request is valid, then find clusters
  342. corresponding to this address, as well as return auxilary
  343. information, such as web3 info and Tornado specific info.
  344. Has support for Redis for fast querying. Even if no clusters
  345. are found, Tornado and basic info is still returned.
  346. """
  347. table_cols: Set[str] = set(Address.__table__.columns.keys())
  348. # Check if this is a valid request searching for an address
  349. checker: AddressRequestChecker = AddressRequestChecker(
  350. request,
  351. table_cols,
  352. entity_key = ENTITY_COL,
  353. conf_key = CONF_COL,
  354. name_key = NAME_COL,
  355. default_page = 0,
  356. default_limit = PAGE_LIMIT,
  357. )
  358. is_valid_request: bool = checker.check()
  359. output: Dict[str, Any] = default_address_response()
  360. if not is_valid_request: # if not, bunt
  361. return Response(output)
  362. address: str = checker.get('address').lower()
  363. page: int = checker.get('page')
  364. size: int = checker.get('limit')
  365. sort_by: str = checker.get('sort_by')
  366. desc_sort: str = checker.get('desc_sort')
  367. filter_by: List[Any] = checker.get('filter_by')
  368. request_repr: str = checker.to_str()
  369. if rds.exists(request_repr): # check if this exists in our cache
  370. response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
  371. return Response(response=response)
  372. # --- fill out some of the known response fields ---
  373. output['data']['query']['address'] = address
  374. output['data']['metadata']['page'] = page
  375. output['data']['metadata']['limit'] = size
  376. for k in output['data']['metadata']['filter_by'].keys():
  377. output['data']['metadata']['filter_by'][k] = checker.get(f'filter_{k}')
  378. if len(address) > 0:
  379. offset: int = page * size
  380. # --- check web3 for information ---
  381. web3_resp: Dict[str, Any] = query_web3(address, w3, ns)
  382. metadata_: Dict[str, Any] = output['data']['query']['metadata']
  383. output['data']['query']['metadata'] = {**metadata_, **web3_resp}
  384. # --- check tornado queries ---
  385. # Note that this is out of the `Address` existence check
  386. tornado_dict: Dict[str, Any] = query_tornado_stats(address)
  387. output['data']['tornado']['summary']['address'].update(tornado_dict)
  388. # --- search for address in DAR and Dff2Vec tables ---
  389. addr: Optional[Address] = Address.query.filter_by(address = address).first()
  390. node: Optional[Embedding] = Embedding.query.filter_by(address = address).first()
  391. # --- Case #1 : address can be found in the DAR Address table ---
  392. if addr is not None:
  393. entity: str = entity_to_str(addr.entity)
  394. if addr.meta_data is None: addr.meta_data = '{}'
  395. addr_metadata: Dict[str, Any] = json.loads(addr.meta_data) # load metadata
  396. if 'ens_name' in addr_metadata: del addr_metadata['ens_name'] # no override
  397. metadata_: Dict[str, Any] = output['data']['query']['metadata']
  398. output['data']['query']['metadata'] = {**metadata_, **addr_metadata}
  399. # store the clusters in here
  400. cluster: List[Address] = []
  401. # stores cluster size with filters. This is necessary to reflect changes
  402. # in # of elements with new filters.
  403. cluster_size: int = 0
  404. query_data: Dict[str, Any] = output['data']['query']
  405. output['data']['query'] = {
  406. **query_data,
  407. **to_dict(addr, table_cols, to_transform=[
  408. ('entity', entity_to_str),
  409. ('heuristic', heuristic_to_str),
  410. ])
  411. }
  412. if entity == EOA:
  413. # --- compute clusters if you are an EOA ---
  414. if addr.user_cluster is not None:
  415. order_command: Any = get_order_command(sort_by, desc_sort)
  416. # find all deposit/eoa addresses in the same cluster & filtering attrs
  417. query_: Any = Address.query.filter(
  418. Address.user_cluster == addr.user_cluster,
  419. *filter_by
  420. )
  421. cluster_: Optional[List[Address]] = query_\
  422. .order_by(order_command)\
  423. .offset(offset).limit(size).all()
  424. if cluster_ is not None:
  425. cluster_: List[Dict[str, Any]] = [
  426. to_dict(
  427. c,
  428. table_cols,
  429. to_add={'ens_name': get_ens_name(c.address, ns)},
  430. to_remove=['id'],
  431. to_transform=[
  432. ('entity', entity_to_str),
  433. ('heuristic', heuristic_to_str),
  434. ],
  435. )
  436. for c in cluster_
  437. ]
  438. cluster += cluster_
  439. # get total number of elements in query
  440. cluster_size_: int = query_.limit(HARD_MAX).count()
  441. cluster_size += cluster_size_
  442. elif entity == DEPOSIT:
  443. # --- compute clusters if you are a deposit ---
  444. # for deposits, we can both look up all relevant eoa's and
  445. # all relevant exchanges. These are in two different clusters
  446. if addr.user_cluster is not None:
  447. order_command: Any = get_order_command(sort_by, desc_sort)
  448. query_: Any = Address.query.filter(
  449. Address.user_cluster == addr.user_cluster,
  450. *filter_by
  451. )
  452. cluster_: Optional[List[Address]] = query_\
  453. .order_by(order_command)\
  454. .offset(offset).limit(size).all()
  455. if cluster_ is not None:
  456. cluster_: Dict[str, Any] = [
  457. to_dict(
  458. c,
  459. table_cols,
  460. to_add={'ens_name': get_ens_name(c.address, ns)},
  461. to_remove=['id'],
  462. to_transform=[
  463. ('entity', entity_to_str),
  464. ('heuristic', heuristic_to_str),
  465. ],
  466. )
  467. for c in cluster_
  468. ]
  469. cluster += cluster_
  470. cluster_size_: int = query_.limit(HARD_MAX).count()
  471. cluster_size += cluster_size_
  472. elif entity == EXCHANGE:
  473. # --- compute clusters if you are an exchange ---
  474. # find all deposit/exchange addresses in the same cluster
  475. if addr.exchange_cluster is not None:
  476. order_command: Any = get_order_command(sort_by, desc_sort)
  477. query_: Any = Address.query.filter(
  478. Address.exchange_cluster == addr.exchange_cluster,
  479. *filter_by
  480. )
  481. cluster_: Optional[List[Address]] = query_\
  482. .order_by(order_command)\
  483. .offset(offset).limit(size).all()
  484. if cluster_ is not None:
  485. cluster_: Dict[str, Any] = [
  486. to_dict(
  487. c,
  488. table_cols,
  489. to_add={'ens_name': get_ens_name(c.address, ns)},
  490. to_remove=['id'],
  491. to_transform=[
  492. ('entity', entity_to_str),
  493. ('heuristic', heuristic_to_str),
  494. ]
  495. )
  496. for c in cluster_
  497. ]
  498. cluster += cluster_
  499. cluster_size_: int = query_.limit(HARD_MAX).count()
  500. cluster_size += cluster_size_
  501. else:
  502. raise Exception(f'Entity {entity} not supported.')
  503. # find Diff2Vec embeddings and add to front of cluster
  504. diff2vec_cluster, diff2vec_size, diff2vec_conf = query_diff2vec(node, address)
  505. cluster: List[Dict[str, Any]] = diff2vec_cluster + cluster
  506. cluster_size += len(diff2vec_cluster)
  507. output['data']['cluster'] = cluster
  508. output['data']['metadata']['cluster_size'] = cluster_size
  509. output['data']['metadata']['num_pages'] = int(math.ceil(cluster_size / size))
  510. # --- compute anonymity score using hyperbolic fn ---
  511. anon_score = compute_anonymity_score(
  512. addr,
  513. ens_name = web3_resp['ens_name'],
  514. # seed computing anonymity score with diff2vec + tcash reveals
  515. extra_cluster_sizes = [
  516. diff2vec_size,
  517. tornado_dict['num_compromised']['num_compromised_exact_match'],
  518. tornado_dict['num_compromised']['num_compromised_gas_price'],
  519. tornado_dict['num_compromised']['num_compromised_multi_denom'],
  520. tornado_dict['num_compromised']['num_compromised_linked_tx'],
  521. tornado_dict['num_compromised']['num_compromised_torn_mine'],
  522. ],
  523. extra_cluster_confs = [
  524. diff2vec_conf,
  525. 1.,
  526. 1.,
  527. 0.5,
  528. 0.25,
  529. 0.25,
  530. ],
  531. )
  532. anon_score: float = round(anon_score, 3) # brevity is a virtue
  533. output['data']['query']['anonymity_score'] = anon_score
  534. # --- Case #2: address is not in the DAR Address table but is
  535. # in Embedding (Diff2Vec) table ---
  536. elif node is not None:
  537. # find Diff2Vec embeddings and add to front of cluster
  538. cluster, cluster_size, conf = query_diff2vec(node, address)
  539. anon_score = compute_anonymity_score(
  540. None,
  541. ens_name = web3_resp['ens_name'],
  542. # seed computing anonymity score with diff2vec + tcash reveals
  543. extra_cluster_sizes = [
  544. cluster_size,
  545. tornado_dict['num_compromised']['num_compromised_exact_match'],
  546. tornado_dict['num_compromised']['num_compromised_gas_price'],
  547. tornado_dict['num_compromised']['num_compromised_multi_denom'],
  548. tornado_dict['num_compromised']['num_compromised_linked_tx'],
  549. tornado_dict['num_compromised']['num_compromised_torn_mine'],
  550. ],
  551. extra_cluster_confs = [
  552. conf,
  553. 1.,
  554. 1.,
  555. 0.5,
  556. 0.25,
  557. 0.25,
  558. ],
  559. )
  560. anon_score: float = round(anon_score, 3)
  561. output['data']['query']['anonymity_score'] = anon_score
  562. output['data']['query']['heuristic'] = DIFF2VEC_HEUR
  563. output['data']['query']['entity'] = NODE
  564. output['data']['query']['conf'] = round(conf, 3)
  565. output['data']['query']['hovers'] = {
  566. 'heuristic': 'this is the primary reveal linking the input address to addresses shown below. It will default to diff2vec, the ML algorithm.',
  567. 'conf': 'indicates confidence (between 0 and 1) that the below addresses are linked to the input address. This is based on how many reveals and the types of reveals that the input address has committed.'
  568. }
  569. output['data']['cluster'] = cluster
  570. output['data']['metadata']['cluster_size'] = cluster_size
  571. output['data']['metadata']['num_pages'] = int(math.ceil(cluster_size / size))
  572. # Check if we know existing information about this address
  573. known_lookup: Dict[str, Any] = get_known_attrs(known_addresses, address)
  574. if len(known_lookup) > 0:
  575. query_metadata: Dict[str, Any] = output['data']['query']['metadata']
  576. output['data']['query']['metadata'] = {**query_metadata, **known_lookup}
  577. # if you are on the top 20k users list, no anonymity
  578. output['data']['query']['anonymity_score'] = 0
  579. # if `addr` doesnt exist, then we assume no clustering
  580. output['success'] = 1
  581. response: str = json.dumps(output)
  582. rds.set(request_repr, bz2.compress(response.encode('utf-8'))) # add to cache
  583. return Response(response=response)
  584. def search_tornado(request: Request) -> Response:
  585. """
  586. We know the address we are searching for is a Tornado pool, which
  587. means we can provide special information about compromises.
  588. """
  589. checker: TornadoPoolRequestChecker = TornadoPoolRequestChecker(
  590. request,
  591. default_page = 0,
  592. default_limit = PAGE_LIMIT,
  593. )
  594. is_valid_request: bool = checker.check()
  595. output: Dict[str, Any] = default_tornado_response()
  596. if not is_valid_request:
  597. return Response(output)
  598. # check if we can find in cache
  599. request_repr: str = checker.to_str()
  600. if rds.exists(request_repr): # check if this exists in our cache
  601. response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
  602. return Response(response=response)
  603. address: str = checker.get('address').lower()
  604. page: int = checker.get('page')
  605. size: int = checker.get('limit')
  606. return_tx: bool = checker.get('return_tx')
  607. output['data']['query']['address'] = address
  608. output['data']['metadata']['page'] = page
  609. output['data']['metadata']['limit'] = size
  610. pool: pd.DataFrame = \
  611. tornado_pools[tornado_pools.address == address].iloc[0]
  612. deposit_txs: Set[str] = get_equal_user_deposit_txs(address)
  613. num_deposits: int = len(deposit_txs)
  614. exact_match_reveals: Set[str] = find_reveals(deposit_txs, ExactMatch)
  615. gas_price_reveals: Set[str] = find_reveals(deposit_txs, GasPrice)
  616. multi_denom_reveals: Set[str] = find_reveals(deposit_txs, MultiDenom)
  617. linked_tx_reveals: Set[str] = find_reveals(deposit_txs, LinkedTransaction)
  618. torn_mine_reveals: Set[str] = find_reveals(deposit_txs, TornMining)
  619. reveal_txs: Set[str] = set().union(
  620. exact_match_reveals, gas_price_reveals, multi_denom_reveals,
  621. linked_tx_reveals, torn_mine_reveals)
  622. num_exact_match_reveals: int = len(exact_match_reveals)
  623. num_gas_price_reveals: int = len(gas_price_reveals)
  624. num_multi_denom_reveals: int = len(multi_denom_reveals)
  625. num_linked_tx_reveals: int = len(linked_tx_reveals)
  626. num_torn_mine_reveals: int = len(torn_mine_reveals)
  627. num_compromised: int = len(reveal_txs)
  628. amount, currency = pool.tags.strip().split()
  629. stats: Dict[str, Any] = {
  630. 'num_deposits': num_deposits,
  631. 'tcash_num_compromised': {
  632. 'all_reveals': num_compromised,
  633. 'exact_match': num_exact_match_reveals,
  634. 'gas_price': num_gas_price_reveals,
  635. 'multi_denom': num_multi_denom_reveals,
  636. 'linked_tx': num_linked_tx_reveals,
  637. 'torn_mine': num_torn_mine_reveals,
  638. },
  639. 'tcash_num_uncompromised': num_deposits - num_compromised,
  640. 'hovers': {
  641. 'tcash_num_uncompromised': '# of deposits to tornado cash pools that are not potentially compromised by the five reveals'
  642. }
  643. }
  644. if return_tx:
  645. output['data']['deposits'] = list(deposit_txs)
  646. output['data']['compromised'] = {
  647. 'exact_match': list(exact_match_reveals),
  648. 'gas_price': list(gas_price_reveals),
  649. 'multi_denom': list(multi_denom_reveals),
  650. 'linked_tx': list(linked_tx_reveals),
  651. 'torn_mine': list(torn_mine_reveals),
  652. }
  653. output['data']['query']['metadata']['amount'] = float(amount)
  654. output['data']['query']['metadata']['currency'] = currency
  655. output['data']['query']['metadata']['stats'] = stats
  656. output['data']['metadata']['compromised_size'] = num_compromised
  657. output['success'] = 1
  658. response: str = json.dumps(output)
  659. rds.set(request_repr, bz2.compress(response.encode('utf-8')))
  660. return Response(response=response)
  661. @app.route('/transaction', methods=['GET'])
  662. def transaction():
  663. return render_template('transaction.html')
  664. def _search_transaction(
  665. address: str,
  666. start_date: datetime,
  667. end_date: datetime,
  668. ) -> Dict[str, List[Dict[str, Any]]]:
  669. def find_tcash_matches(address: str, Heuristic: Any, identifier: int
  670. ) -> List[Dict[str, Any]]:
  671. rows: List[Heuristic] = Heuristic.query.filter(
  672. Heuristic.address == address,
  673. Heuristic.block_ts >= start_date,
  674. Heuristic.block_ts < end_date,
  675. ).all()
  676. rows: List[Dict[str, Any]] = [
  677. {'transaction': row.transaction, 'block': row.block_number,
  678. 'timestamp': row.block_ts, 'heuristic': identifier,
  679. 'metadata': {}} for row in rows]
  680. return rows
  681. def find_dar_matches(address: str) -> List[Dict[str, Any]]:
  682. rows: List[DepositTransaction] = DepositTransaction.query.filter(
  683. DepositTransaction.address == address,
  684. DepositTransaction.block_ts >= start_date,
  685. DepositTransaction.block_ts < end_date,
  686. ).all()
  687. rows: List[Dict[str, Any]] = [
  688. {'transaction': row.transaction, 'block': row.block_number,
  689. 'timestamp': row.block_ts, 'heuristic': DEPO_REUSE_HEUR,
  690. 'metadata': {'deposit': row.deposit}} for row in rows]
  691. return rows
  692. dar_matches: List[Dict[str, Any]] = find_dar_matches(address)
  693. same_addr_matches: List[Dict[str, Any]] = \
  694. find_tcash_matches(address, ExactMatch, SAME_ADDR_HEUR)
  695. gas_price_matches: List[Dict[str, Any]] = \
  696. find_tcash_matches(address, GasPrice, GAS_PRICE_HEUR)
  697. same_num_tx_matches: List[Dict[str, Any]] = \
  698. find_tcash_matches(address, MultiDenom, SAME_NUM_TX_HEUR)
  699. linked_tx_matches: List[Dict[str, Any]] = \
  700. find_tcash_matches(address, LinkedTransaction, LINKED_TX_HEUR)
  701. torn_mine_matches: List[Dict[str, Any]] = \
  702. find_tcash_matches(address, TornMining, TORN_MINE_HEUR)
  703. transactions: List[Dict[str, Any]] = \
  704. dar_matches + same_addr_matches + gas_price_matches + same_num_tx_matches + \
  705. linked_tx_matches + torn_mine_matches
  706. # sort by timestamp
  707. transactions: List[Dict[str, Any]] = sorted(transactions, key = lambda x: x['timestamp'])
  708. def tx_datetime_to_str(raw_transactions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
  709. transactions: List[Dict[str, Any]] = []
  710. for tx in raw_transactions:
  711. tx['timestamp'] = tx['timestamp'].strftime('%m/%d/%Y')
  712. transactions.append(tx)
  713. return transactions
  714. # remove datetime objects
  715. transactions: List[Dict[str, Any]] = tx_datetime_to_str(transactions)
  716. output: Dict[str, List[Dict[str, Any]]] = {
  717. 'transactions': transactions,
  718. 'dar_matches': dar_matches,
  719. 'same_addr_matches': same_addr_matches,
  720. 'gas_price_matches': gas_price_matches,
  721. 'same_num_tx_matches': same_num_tx_matches,
  722. 'linked_tx_matches': linked_tx_matches,
  723. 'torn_mine_matches': torn_mine_matches,
  724. }
  725. return output
  726. @app.route('/search/transaction', methods=['GET'])
  727. def search_transaction():
  728. address: str = request.args.get('address', '')
  729. address: str = resolve_address(address, ns)
  730. address: str = address.lower()
  731. if not is_valid_address(address):
  732. return default_transaction_response()
  733. request.args = dict(request.args)
  734. request.args['address'] = address
  735. checker: TransactionRequestChecker = TransactionRequestChecker(
  736. request,
  737. default_page = 0,
  738. default_limit = PAGE_LIMIT,
  739. default_start_date='01/01/2013',
  740. default_end_date=get_today_date_str(),
  741. )
  742. is_valid_request: bool = checker.check()
  743. output: Dict[str, Any] = default_transaction_response()
  744. if not is_valid_request:
  745. return Response(output)
  746. address: str = checker.get('address').lower()
  747. start_date: str = checker.get('start_date')
  748. start_date_obj: datetime = checker.get('start_date_obj')
  749. end_date: str = checker.get('end_date')
  750. end_date_obj: datetime = checker.get('end_date_obj')
  751. page: int = checker.get('page')
  752. size: int = checker.get('limit')
  753. request_repr: str = checker.to_str()
  754. if rds.exists(request_repr):
  755. response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
  756. return Response(response=response)
  757. search_output: Dict[str, List[Dict[str, Any]]] = \
  758. _search_transaction(address, start_date_obj, end_date_obj)
  759. transactions: List[Dict[str, Any]] = search_output['transactions']
  760. stats: Dict[str, Dict[str, int]] = {
  761. 'num_transactions': len(transactions),
  762. 'num_ethereum': {
  763. DEPO_REUSE_HEUR: len(search_output['dar_matches']),
  764. },
  765. 'num_tcash': {
  766. SAME_ADDR_HEUR: len(search_output['same_addr_matches']),
  767. GAS_PRICE_HEUR: len(search_output['gas_price_matches']),
  768. SAME_NUM_TX_HEUR: len(search_output['same_num_tx_matches']),
  769. LINKED_TX_HEUR: len(search_output['linked_tx_matches']),
  770. TORN_MINE_HEUR: len(search_output['torn_mine_matches']),
  771. },
  772. }
  773. ranks: Dict[str, Dict[str, int]] = get_relative_rank(stats)
  774. stats['hovers'] = {
  775. 'num_transactions': 'Number of transaction reveals involving this Ethereum address',
  776. 'num_ethereum': 'Number of Ethereum transaction reveals based on the Deposit Address Reuse Reveal',
  777. 'num_tcash': 'Number of reveals by this address using Tornado Cash'
  778. }
  779. stats['num_tcash']['hovers'] = {
  780. SAME_ADDR_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the address match heuristic. Address match links transactions if a unique address deposits and withdraws to a Tornado Cash pool.',
  781. GAS_PRICE_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the unique gas price heuristic. Unique gas price links deposit and withdrawal transactions that use a unique and specific (e.g. 3.1415) gas price.',
  782. SAME_NUM_TX_HEUR: '# of deposit/withdrawals into tornado cash pools linked through the multi-denomination reveal. Multi-denomination reveal is when a “source” wallet mixes a specific set of denominations and your “destination” wallet withdraws them all. For example, if you mix 3x 10 ETH, 2x 1 ETH, 1x 0.1 ETH to get 32.1 ETH, you could reveal yourself within the Tornado protocol if no other wallet has mixed this exact denomination set.',
  783. LINKED_TX_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the linked address reveal. Linked address reveal connects wallets that interact outside of Tornado Cash.',
  784. TORN_MINE_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the TORN mining reveal. Careless swapping of Anonymity Points to TORN tokens reveal information of when deposits were made.',
  785. }
  786. stats['num_ethereum']['hovers'] = dict(
  787. DEPO_REUSE_HEUR = 'when two user addresses send to the same centralized exchange deposit address, they are linked by the deposit address reuse heuristic'
  788. )
  789. web3_resp: Dict[str, Any] = query_web3(address, w3, ns)
  790. addr: Optional[Address] = Address.query.filter_by(address = address).first()
  791. if addr is not None:
  792. node: Optional[Embedding] = Embedding.query.filter_by(address = address).first()
  793. _, diff2vec_size, diff2vec_conf = query_diff2vec(node, address)
  794. tornado_dict: Dict[str, Any] = query_tornado_stats(address)
  795. anon_score = compute_anonymity_score(
  796. addr,
  797. ens_name = web3_resp['ens_name'],
  798. # seed computing anonymity score with diff2vec + tcash reveals
  799. extra_cluster_sizes = [
  800. diff2vec_size,
  801. tornado_dict['num_compromised']['num_compromised_exact_match'],
  802. tornado_dict['num_compromised']['num_compromised_gas_price'],
  803. tornado_dict['num_compromised']['num_compromised_multi_denom'],
  804. tornado_dict['num_compromised']['num_compromised_linked_tx'],
  805. tornado_dict['num_compromised']['num_compromised_torn_mine'],
  806. ],
  807. extra_cluster_confs = [
  808. diff2vec_conf,
  809. 1.,
  810. 1.,
  811. 0.5,
  812. 0.25,
  813. 0.25,
  814. ],
  815. )
  816. anon_score: float = round(anon_score, 3) # brevity is a virtue
  817. output['data']['query']['anonymity_score'] = anon_score
  818. # --
  819. output['data']['query']['address'] = address
  820. output['data']['query']['start_date'] = start_date
  821. output['data']['query']['end_date'] = end_date
  822. output['data']['metadata']['page'] = page
  823. output['data']['metadata']['limit'] = size
  824. output['data']['query']['metadata']['stats'] = stats
  825. output['data']['query']['metadata']['ranks'] = ranks
  826. output['data']['transactions'] = transactions
  827. output['success'] = 1
  828. response: str = json.dumps(output)
  829. rds.set(request_repr, bz2.compress(response.encode('utf-8'))) # add to cache
  830. return Response(response=response)
  831. @app.route('/plot/transaction', methods=['GET'])
  832. def make_weekly_plot():
  833. """
  834. Pass in `transactions` object from `/search/transaction` endpoint.
  835. We treat this as a seperate endpoint to allow for efficient repeated
  836. calls to this w/o requerying `/search/transaction`.
  837. """
  838. address: str = request.args.get('address', '')
  839. address: str = resolve_address(address, ns)
  840. address: str = address.lower()
  841. request.args = dict(request.args)
  842. request.args['address'] = address
  843. if not is_valid_address(address):
  844. return default_plot_response()
  845. window: str = request.args.get('window', '1yr')
  846. checker: PlotRequestChecker = PlotRequestChecker(request, default_window=window)
  847. is_valid_request: bool = checker.check()
  848. output: Dict[str, Any] = default_plot_response()
  849. if not is_valid_request:
  850. return Response(output)
  851. today: datetime = datetime.today()
  852. today: datetime = datetime.strptime(today.strftime('%m/%d/%Y'), '%m/%d/%Y')
  853. if window == '1mth':
  854. delta: relativedelta = relativedelta(months=1)
  855. elif window == '3mth':
  856. delta: relativedelta = relativedelta(months=3)
  857. elif window == '6mth':
  858. delta: relativedelta = relativedelta(months=6)
  859. elif window == '1yr':
  860. delta: relativedelta = relativedelta(months=12)
  861. elif window == '3yr':
  862. delta: relativedelta = relativedelta(months=12*3)
  863. elif window == '5yr':
  864. delta: relativedelta = relativedelta(months=12*5)
  865. else:
  866. raise Exception(f'Window {window} not supported.')
  867. start_date_obj: datetime = today - delta
  868. search_output: Dict[str, List[Dict[str, Any]]] = \
  869. _search_transaction(address, start_date_obj, today)
  870. transactions: List[Dict[str, Any]] = search_output['transactions']
  871. data: List[Dict[str, Any]] = []
  872. cur_start: datetime = copy.copy(start_date_obj)
  873. cur_end: datetime = cur_start + relativedelta(weeks=1)
  874. count: int = 0
  875. while cur_end <= today:
  876. counts: Dict[str, int] = {
  877. DEPO_REUSE_HEUR: 0,
  878. SAME_ADDR_HEUR: 0,
  879. GAS_PRICE_HEUR: 0,
  880. SAME_NUM_TX_HEUR: 0,
  881. LINKED_TX_HEUR: 0,
  882. TORN_MINE_HEUR: 0,
  883. }
  884. for transaction in transactions:
  885. ts: datetime = datetime.strptime(transaction['timestamp'], '%m/%d/%Y')
  886. if (ts >= cur_start) and (ts < cur_end):
  887. counts[transaction['heuristic']] += 1
  888. start_date: str = cur_start.strftime('%m/%d/%Y')
  889. end_date: str = cur_end.strftime('%m/%d/%Y')
  890. row: Dict[str, Any] = {
  891. 'index': count,
  892. 'start_date': start_date,
  893. 'end_date': end_date,
  894. 'name': f'{start_date}-{end_date}',
  895. **counts,
  896. }
  897. data.append(row)
  898. cur_start: datetime = copy.copy(cur_end)
  899. cur_end: datetime = cur_start + relativedelta(weeks=1)
  900. count += 1
  901. output['query']['window'] = window
  902. output['query']['start_time'] = start_date_obj.strftime('%m/%d/%Y')
  903. output['query']['end_time'] = today.strftime('%m/%d/%Y')
  904. output['query']['metadata']['num_points'] = len(data)
  905. output['query']['metadata']['today'] = today.strftime('%m/%d/%Y')
  906. output['data'] = data
  907. output['success'] = 1
  908. response: str = json.dumps(output)
  909. return Response(response=response)
  910. def get_relative_rank(my_stats: Dict[str, int]) -> Dict[str, Dict[str, int]]:
  911. ranks: Dict[str, Dict[str, int]] = {
  912. 'overall': 0,
  913. 'ethereum': {},
  914. 'tcash': {},
  915. 'hovers': {
  916. 'ethereum': 'percentile ranking of reveals by this address vs. other ethereum addresses',
  917. 'tcash': 'percentile ranking of reveals by this address vs. other ethereum addresses that have used Tornado Cash',
  918. }
  919. }
  920. overall: List[float] = []
  921. for heuristic in my_stats['num_ethereum']:
  922. rank: float = compute_rank(my_stats['num_ethereum'][heuristic], reveal_dists[heuristic])
  923. ranks['ethereum'][heuristic] = int(100 * rank)
  924. overall.append(rank)
  925. for heuristic in my_stats['num_ethereum']:
  926. ranks['ethereum'][heuristic] = str(ranks['ethereum'][heuristic]) + '%'
  927. for heuristic in my_stats['num_tcash']:
  928. rank: float = compute_rank(my_stats['num_tcash'][heuristic], reveal_dists[heuristic])
  929. ranks['tcash'][heuristic] = int(100 * rank)
  930. overall.append(rank)
  931. for heuristic in my_stats['num_tcash']:
  932. ranks['tcash'][heuristic] = str(ranks['tcash'][heuristic]) + '%'
  933. overall: int = int(100 * float(np.mean(overall)))
  934. ranks['overall'] = str(overall) + '%'
  935. return ranks
  936. def compute_rank(count: int, dist: Dict[int, int]) -> float:
  937. total: int = int(sum(dist.values()))
  938. bins: List[int] = sorted(list(dist.keys()))
  939. vals: List[int] = [dist[bin] for bin in bins]
  940. bins: np.array = np.array(bins)
  941. vals: np.array = np.array(vals)
  942. cdf: int = int(np.sum(vals[bins < count]))
  943. prob: float = cdf / float(total)
  944. return prob