views.py 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124
  1. import bz2
  2. import math
  3. import json
  4. import copy
  5. import numpy as np
  6. import pandas as pd
  7. from datetime import datetime
  8. from dateutil.relativedelta import relativedelta
  9. from typing import Dict, Optional, List, Any, Set
  10. from app import app, w3, ns, rds, known_addresses, tornado_pools, reveal_dists
  11. from app.models import \
  12. Address, ExactMatch, GasPrice, MultiDenom, LinkedTransaction, TornMining, \
  13. TornadoDeposit, TornadoWithdraw, Embedding, DepositTransaction
  14. from app.utils import \
  15. get_anonymity_score, get_order_command, \
  16. entity_to_int, entity_to_str, to_dict, conf_to_label, \
  17. heuristic_to_str, is_valid_address, get_today_date_str, \
  18. is_tornado_address, get_equal_user_deposit_txs, find_reveals, \
  19. AddressRequestChecker, TornadoPoolRequestChecker, \
  20. TransactionRequestChecker, PlotRequestChecker, \
  21. default_address_response, default_tornado_response, \
  22. default_transaction_response, default_plot_response, \
  23. NAME_COL, ENTITY_COL, CONF_COL, EOA, DEPOSIT, EXCHANGE, NODE, \
  24. GAS_PRICE_HEUR, DEPO_REUSE_HEUR, DIFF2VEC_HEUR, SAME_NUM_TX_HEUR, \
  25. SAME_ADDR_HEUR, LINKED_TX_HEUR, TORN_MINE_HEUR, DIFF2VEC_HEUR
  26. from app.lib.w3 import query_web3, get_ens_name, resolve_address
  27. from flask import request, Request, Response
  28. from flask import render_template
  29. from sqlalchemy import or_
  30. from app.utils import get_known_attrs, get_display_aliases
  31. PAGE_LIMIT = 50
  32. HARD_MAX: int = 1000
  33. @app.route('/', methods=['GET'])
  34. @app.route('/index', methods=['GET'])
  35. @app.route('/cluster', methods=['GET'])
  36. @app.route('/transactions', methods=['GET'])
  37. def index():
  38. return render_template('index.html')
  39. @app.route('/utils/aliases', methods=['GET'])
  40. def alias():
  41. response: str = json.dumps(get_display_aliases())
  42. return Response(response=response)
  43. @app.route('/utils/istornado', methods=['GET'])
  44. def istornado():
  45. address: str = request.args.get('address', '')
  46. address: str = resolve_address(address, ns)
  47. address: str = address.lower()
  48. output: Dict[str, Any] = {
  49. 'data': {
  50. 'address': address,
  51. 'is_tornado': 1,
  52. 'amount': 0,
  53. 'currency': '',
  54. },
  55. 'success': 0,
  56. }
  57. if not is_valid_address(address):
  58. return Response(json.dumps(output))
  59. is_tornado: bool = int(is_tornado_address(address))
  60. if not is_tornado:
  61. amount = None
  62. currency = None
  63. else:
  64. pool: pd.DataFrame = \
  65. tornado_pools[tornado_pools.address == address].iloc[0]
  66. amount, currency = pool.tags.strip().split()
  67. amount = int(amount)
  68. output['data']['is_tornado'] = is_tornado
  69. output['data']['amount'] = amount
  70. output['data']['currency'] = currency
  71. output['success'] = 1
  72. response: str = json.dumps(output)
  73. return Response(response)
  74. @app.route('/utils/gettornadopools', methods=['GET'])
  75. def get_tornado_pools():
  76. pools = []
  77. for _, pool in tornado_pools.iterrows():
  78. # amount, currency = pool.tags.strip().split()
  79. pools.append({
  80. 'address': pool.address,
  81. 'name': pool.tags,
  82. })
  83. output: Dict[str, Any] = {
  84. 'data': {'pools': pools},
  85. 'success': 1,
  86. }
  87. response: str = json.dumps(output)
  88. return Response(response)
  89. @app.route('/search', methods=['GET'])
  90. def search():
  91. address: str = request.args.get('address', '')
  92. # after this call, we should expect address to be an address
  93. address: str = resolve_address(address, ns)
  94. address: str = address.lower()
  95. # do a simple check that the address is valid
  96. if not is_valid_address(address):
  97. return default_address_response()
  98. # check if address is a tornado pool or not
  99. is_tornado: bool = is_tornado_address(address)
  100. # change request object
  101. request.args = dict(request.args)
  102. request.args['address'] = address
  103. if is_tornado:
  104. # ---------------------------------------------------------
  105. # MODE #1
  106. # This is a TCash pool, so we can show specific information
  107. # about compromised addresses via our heuristics.
  108. # ---------------------------------------------------------
  109. response: Response = search_tornado(request)
  110. else:
  111. # ---------------------------------------------------------
  112. # MODE #2
  113. # This is a regular address, so we can search our dataset
  114. # for its cluster and complimentary information.
  115. # ---------------------------------------------------------
  116. response: Response = search_address(request)
  117. return response
  118. @app.route('/search/compromised', methods=['GET'])
  119. def haveibeencompromised():
  120. address: str = request.args.get('address', '')
  121. pool: str = request.args.get('pool', '') # tornado pool address
  122. address: str = resolve_address(address, ns)
  123. output: Dict[str, Any] = {
  124. 'data': {
  125. 'address': address,
  126. 'pool': pool,
  127. 'compromised_size': 0,
  128. 'compromised': [],
  129. },
  130. 'success': 0,
  131. }
  132. if not is_valid_address(address) or not is_valid_address(pool):
  133. return Response(json.dumps(output))
  134. # find all the deposit transactions made by user for this pool
  135. deposits: Optional[List[TornadoDeposit]] = \
  136. TornadoDeposit.query.filter_by(
  137. from_address = address,
  138. tornado_cash_address = pool,
  139. ).all()
  140. deposit_txs: Set[str] = set([d.hash for d in deposits])
  141. # search for these txs in the reveal tables
  142. exact_match_reveals: Set[str] = find_reveals(deposit_txs, ExactMatch)
  143. gas_price_reveals: Set[str] = find_reveals(deposit_txs, GasPrice)
  144. multi_denom_reveals: Set[str] = find_reveals(deposit_txs, MultiDenom)
  145. linked_tx_reveals: Set[str] = find_reveals(deposit_txs, LinkedTransaction)
  146. torn_mine_reveals: Set[str] = find_reveals(deposit_txs, TornMining)
  147. def format_compromised(
  148. exact_match_reveals: Set[str],
  149. gas_price_reveals: Set[str],
  150. multi_denom_reveals: Set[str],
  151. linked_tx_reveals: Set[str],
  152. torn_mine_reveals: Set[str],
  153. ) -> List[Dict[str, Any]]:
  154. compromised: List[Dict[str, Any]] = []
  155. for reveal in exact_match_reveals:
  156. compromised.append({'heuristic': heuristic_to_str(1), 'transaction': reveal})
  157. for reveal in gas_price_reveals:
  158. compromised.append({'heuristic': heuristic_to_str(2), 'transaction': reveal})
  159. for reveal in multi_denom_reveals:
  160. compromised.append({'heuristic': heuristic_to_str(3), 'transaction': reveal})
  161. for reveal in linked_tx_reveals:
  162. compromised.append({'heuristic': heuristic_to_str(4), 'transaction': reveal})
  163. for reveal in torn_mine_reveals:
  164. compromised.append({'heuristic': heuristic_to_str(5), 'transaction': reveal})
  165. return compromised
  166. # add compromised sets to response
  167. compromised: List[Dict[str, Any]] = format_compromised(
  168. exact_match_reveals, gas_price_reveals, multi_denom_reveals,
  169. linked_tx_reveals, torn_mine_reveals)
  170. output['data']['compromised'] = compromised
  171. output['data']['compromised_size'] = len(compromised)
  172. output['success'] = 1
  173. response: str = json.dumps(output)
  174. return Response(response)
  175. def query_diff2vec(node: Embedding, address) -> List[Dict[str, Any]]:
  176. """
  177. Search the embedding table to fetch neighbors from Diff2Vec cluster.
  178. """
  179. cluster: List[Dict[str, Any]] = []
  180. cluster_conf: float = 0
  181. if node is not None:
  182. neighbors: List[int] = json.loads(node.neighbors)
  183. distances: List[float] = json.loads(node.distances)
  184. for neighbor, distance in zip(neighbors, distances):
  185. # swap terms b/c of upload accident
  186. neighbor, distance = distance, neighbor
  187. if neighbor == address: continue # skip
  188. cur_conf: float = float(1./abs(10.*distance+1.))
  189. member: Dict[str, Any] = {
  190. 'address': neighbor,
  191. # '_distance': distance,
  192. # add one to make max 1
  193. 'conf': round(cur_conf, 3),
  194. 'conf_label': conf_to_label(cur_conf),
  195. 'heuristic': DIFF2VEC_HEUR,
  196. 'entity': NODE,
  197. 'ens_name': get_ens_name(neighbor, ns),
  198. }
  199. cluster.append(member)
  200. cluster_conf += member['conf']
  201. cluster_size: int = len(cluster)
  202. cluster_conf: float = cluster_conf / float(cluster_size)
  203. return cluster, cluster_size, cluster_conf
  204. def compute_anonymity_score(
  205. addr: Optional[Address],
  206. ens_name: Optional[str] = None,
  207. exchange_weight: float = 0.1,
  208. slope: float = 0.1,
  209. extra_cluster_sizes: List[int] = [],
  210. extra_cluster_confs: List[float] = []
  211. ) -> float:
  212. """
  213. Only EOA addresses have an anonymity score. If we get an exchange,
  214. we return an anonymity score of 0. If we get a deposit, we return -1,
  215. which represents N/A.
  216. For EOA addresses, we grade the anonymity by the confidence and number
  217. of other EOA addresses in the same cluster, as well as the confidence
  218. and number of other exchanges in the same cluster (which we find through
  219. the deposits this address interacts with). Exchange interactions are
  220. discounted (by `exchange_weight` factor) compared to other EOAs.
  221. If ens_name is provided and not empty, we cap the anonymity score at 90.
  222. If addr is None, we assume clusters are specified in extra_cluster_*.
  223. """
  224. cluster_confs: List[float] = extra_cluster_sizes
  225. cluster_sizes: List[float] = extra_cluster_confs
  226. if addr is not None:
  227. if addr.entity == entity_to_int(DEPOSIT):
  228. return -1 # represents N/A
  229. elif addr.entity == entity_to_int(EXCHANGE):
  230. return 0 # CEX have no anonymity
  231. assert addr.entity == entity_to_int(EOA), \
  232. f'Unknown entity: {entity_to_str(addr.entity)}'
  233. if addr.user_cluster is not None:
  234. # find all other EOA addresses with same `dar_user_cluster`.
  235. num_cluster: int = Address.query.filter(
  236. Address.user_cluster == addr.user_cluster,
  237. or_(Address.entity == entity_to_int(EOA)),
  238. ).limit(HARD_MAX).count()
  239. cluster_confs.append(addr.conf)
  240. cluster_sizes.append(num_cluster)
  241. # find all DEPOSIT address with same `user_cluster`.
  242. deposits: Optional[List[Address]] = Address.query.filter(
  243. Address.user_cluster == addr.user_cluster,
  244. Address.entity == entity_to_int(DEPOSIT),
  245. ).limit(HARD_MAX).all()
  246. exchanges: Set[str] = set([
  247. deposit.exchange_cluster for deposit in deposits])
  248. cluster_confs.append(addr.conf * exchange_weight)
  249. cluster_sizes.append(len(exchanges))
  250. cluster_confs: np.array = np.array(cluster_confs)
  251. cluster_sizes: np.array = np.array(cluster_sizes)
  252. score: float = get_anonymity_score(
  253. cluster_confs, cluster_sizes, slope = slope)
  254. if ens_name is not None:
  255. if len(ens_name) > 0 and '.eth' in ens_name:
  256. # having an ENS name caps your maximum anonymity score
  257. score: float = min(score, 0.90)
  258. return score
  259. def query_heuristic(address: str, class_: Any) -> Set[str]:
  260. """
  261. Given an address, find out how many times this address' txs
  262. appear in a heuristic. Pass the table class for heuristic.
  263. """
  264. rows: Optional[List[class_]] = \
  265. class_.query.filter_by(address = address).all()
  266. cluster_txs: List[str] = []
  267. if (len(rows) > 0):
  268. clusters: List[int] = list(set([row.cluster for row in rows]))
  269. cluster: List[class_] = \
  270. class_.query.filter(class_.cluster.in_(clusters)).all()
  271. cluster_txs: List[str] = [row.transaction for row in cluster]
  272. return set(cluster_txs) # no duplicates
  273. def query_tornado_stats(address: str) -> Dict[str, Any]:
  274. """
  275. Given a user address, we want to supply a few statistics:
  276. 1) Number of deposits made to Tornado pools.
  277. 2) Number of withdraws made to Tornado pools.
  278. 3) Number of deposits made that are part of a cluster or of a TCash reveal.
  279. """
  280. exact_match_txs: Set[str] = query_heuristic(address, ExactMatch)
  281. gas_price_txs: Set[str] = query_heuristic(address, GasPrice)
  282. multi_denom_txs: Set[str] = query_heuristic(address, MultiDenom)
  283. linked_txs: Set[str] = query_heuristic(address, LinkedTransaction)
  284. torn_mine_txs: Set[str] = query_heuristic(address, TornMining)
  285. reveal_txs: Set[str] = set().union(
  286. exact_match_txs, gas_price_txs, multi_denom_txs,
  287. linked_txs, torn_mine_txs)
  288. # find all txs where the from_address is the current user.
  289. deposits: Optional[List[TornadoDeposit]] = \
  290. TornadoDeposit.query.filter_by(from_address = address).all()
  291. deposit_txs: Set[str] = set([d.hash for d in deposits])
  292. num_deposit: int = len(deposit_txs)
  293. # find all txs where the recipient_address is the current user
  294. withdraws: Optional[List[TornadoWithdraw]] = \
  295. TornadoWithdraw.query.filter_by(recipient_address = address).all()
  296. withdraw_txs: Set[str] = set([w.hash for w in withdraws])
  297. num_withdraw: int = len(withdraw_txs)
  298. all_txs: Set[str] = deposit_txs.union(withdraw_txs)
  299. num_all: int = num_deposit + num_withdraw
  300. num_remain: int = len(all_txs - reveal_txs)
  301. num_remain_exact_match: int = len(all_txs - exact_match_txs)
  302. num_remain_gas_price: int = len(all_txs - gas_price_txs)
  303. num_remain_multi_denom: int = len(all_txs - multi_denom_txs)
  304. num_remain_linked_tx: int = len(all_txs - linked_txs)
  305. num_remain_torn_mine: int = len(all_txs - torn_mine_txs)
  306. num_compromised: int = num_all - num_remain
  307. num_compromised_exact_match = num_all - num_remain_exact_match
  308. num_compromised_gas_price = num_all - num_remain_gas_price
  309. num_compromised_multi_denom = num_all - num_remain_multi_denom
  310. num_compromised_linked_tx = num_all - num_remain_linked_tx
  311. num_compromised_torn_mine = num_all - num_remain_torn_mine
  312. # compute number of txs compromised by TCash heuristics
  313. stats: Dict[str, Any] = dict(
  314. num_deposit = num_deposit,
  315. num_withdraw = num_withdraw,
  316. num_compromised = dict(
  317. all_reveals = num_compromised,
  318. num_compromised_exact_match = num_compromised_exact_match,
  319. num_compromised_gas_price = num_compromised_gas_price,
  320. num_compromised_multi_denom = num_compromised_multi_denom,
  321. num_compromised_linked_tx = num_compromised_linked_tx,
  322. num_compromised_torn_mine = num_compromised_torn_mine,
  323. hovers = dict(
  324. num_compromised_exact_match = '# of deposits to/withdrawals from tornado cash pools linked through the address match heuristic. Address match links transactions if a unique address deposits and withdraws to a Tornado Cash pool.',
  325. num_compromised_gas_price = '# of deposits to/withdrawals from tornado cash pools linked through the unique gas price heuristic. Unique gas price links deposit and withdrawal transactions that use a unique and specific (e.g. 3.1415) gas price.',
  326. num_compromised_multi_denom = '# of deposit/withdrawals into tornado cash pools linked through the multi-denomination reveal. Multi-denomination reveal is when a “source” wallet mixes a specific set of denominations and your “destination” wallet withdraws them all. For example, if you mix 3x 10 ETH, 2x 1 ETH, 1x 0.1 ETH to get 32.1 ETH, you could reveal yourself within the Tornado protocol if no other wallet has mixed this exact denomination set.',
  327. num_compromised_linked_tx = '# of deposits to/withdrawals from tornado cash pools linked through the linked address reveal. Linked address reveal connects wallets that interact outside of Tornado Cash.',
  328. num_compromised_torn_mine = '# of deposits to/withdrawals from tornado cash pools linked through the TORN mining reveal. Careless swapping of Anonymity Points to TORN tokens reveal information of when deposits were made.',
  329. )
  330. ),
  331. num_uncompromised = num_all - num_compromised,
  332. hovers = dict(
  333. num_deposit = '# of deposit transactions into tornado cash pools.',
  334. num_withdraw = '# of withdrawal transactions from tornado cash pools.',
  335. num_compromised = '# of deposits to/withdrawals from tornado cash pools that may be linked through the mis-use of Tornado cash.',
  336. num_uncompromised = '# of deposits to/withdrawals from tornado cash pools that are not potentially linked by the five reveals',
  337. )
  338. )
  339. return stats
  340. def search_address(request: Request) -> Response:
  341. """
  342. Master function for serving address requests. This function
  343. will first check if the request is valid, then find clusters
  344. corresponding to this address, as well as return auxilary
  345. information, such as web3 info and Tornado specific info.
  346. Has support for Redis for fast querying. Even if no clusters
  347. are found, Tornado and basic info is still returned.
  348. """
  349. table_cols: Set[str] = set(Address.__table__.columns.keys())
  350. # Check if this is a valid request searching for an address
  351. checker: AddressRequestChecker = AddressRequestChecker(
  352. request,
  353. table_cols,
  354. entity_key = ENTITY_COL,
  355. conf_key = CONF_COL,
  356. name_key = NAME_COL,
  357. default_page = 0,
  358. default_limit = PAGE_LIMIT,
  359. )
  360. is_valid_request: bool = checker.check()
  361. output: Dict[str, Any] = default_address_response()
  362. if not is_valid_request: # if not, bunt
  363. return Response(output)
  364. address: str = checker.get('address').lower()
  365. page: int = checker.get('page')
  366. size: int = checker.get('limit')
  367. sort_by: str = checker.get('sort_by')
  368. desc_sort: str = checker.get('desc_sort')
  369. filter_by: List[Any] = checker.get('filter_by')
  370. request_repr: str = checker.to_str()
  371. if rds.exists(request_repr): # check if this exists in our cache
  372. response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
  373. return Response(response=response)
  374. # --- fill out some of the known response fields ---
  375. output['data']['query']['address'] = address
  376. output['data']['metadata']['page'] = page
  377. output['data']['metadata']['limit'] = size
  378. for k in output['data']['metadata']['filter_by'].keys():
  379. output['data']['metadata']['filter_by'][k] = checker.get(f'filter_{k}')
  380. if len(address) > 0:
  381. offset: int = page * size
  382. # --- check web3 for information ---
  383. web3_resp: Dict[str, Any] = query_web3(address, w3, ns)
  384. metadata_: Dict[str, Any] = output['data']['query']['metadata']
  385. output['data']['query']['metadata'] = {**metadata_, **web3_resp}
  386. # --- check tornado queries ---
  387. # Note that this is out of the `Address` existence check
  388. tornado_dict: Dict[str, Any] = query_tornado_stats(address)
  389. output['data']['tornado']['summary']['address'].update(tornado_dict)
  390. # --- search for address in DAR and Dff2Vec tables ---
  391. addr: Optional[Address] = Address.query.filter_by(address = address).first()
  392. node: Optional[Embedding] = Embedding.query.filter_by(address = address).first()
  393. # --- Case #1 : address can be found in the DAR Address table ---
  394. if addr is not None:
  395. entity: str = entity_to_str(addr.entity)
  396. if addr.meta_data is None: addr.meta_data = '{}'
  397. addr_metadata: Dict[str, Any] = json.loads(addr.meta_data) # load metadata
  398. if 'ens_name' in addr_metadata: del addr_metadata['ens_name'] # no override
  399. metadata_: Dict[str, Any] = output['data']['query']['metadata']
  400. output['data']['query']['metadata'] = {**metadata_, **addr_metadata}
  401. # store the clusters in here
  402. cluster: List[Address] = []
  403. # stores cluster size with filters. This is necessary to reflect changes
  404. # in # of elements with new filters.
  405. cluster_size: int = 0
  406. query_data: Dict[str, Any] = output['data']['query']
  407. output['data']['query'] = {
  408. **query_data,
  409. **to_dict(addr, table_cols, to_transform=[
  410. ('entity', entity_to_str),
  411. ('heuristic', heuristic_to_str),
  412. ])
  413. }
  414. if entity == EOA:
  415. # --- compute clusters if you are an EOA ---
  416. if addr.user_cluster is not None:
  417. order_command: Any = get_order_command(sort_by, desc_sort)
  418. # find all deposit/eoa addresses in the same cluster & filtering attrs
  419. query_: Any = Address.query.filter(
  420. Address.user_cluster == addr.user_cluster,
  421. *filter_by
  422. )
  423. cluster_: Optional[List[Address]] = query_\
  424. .order_by(order_command)\
  425. .offset(offset).limit(size).all()
  426. if cluster_ is not None:
  427. cluster_: List[Dict[str, Any]] = [
  428. to_dict(
  429. c,
  430. table_cols,
  431. to_add={'ens_name': get_ens_name(c.address, ns)},
  432. to_remove=['id'],
  433. to_transform=[
  434. ('entity', entity_to_str),
  435. ('heuristic', heuristic_to_str),
  436. ],
  437. )
  438. for c in cluster_
  439. ]
  440. cluster += cluster_
  441. # get total number of elements in query
  442. cluster_size_: int = query_.limit(HARD_MAX).count()
  443. cluster_size += cluster_size_
  444. elif entity == DEPOSIT:
  445. # --- compute clusters if you are a deposit ---
  446. # for deposits, we can both look up all relevant eoa's and
  447. # all relevant exchanges. These are in two different clusters
  448. if addr.user_cluster is not None:
  449. order_command: Any = get_order_command(sort_by, desc_sort)
  450. query_: Any = Address.query.filter(
  451. Address.user_cluster == addr.user_cluster,
  452. *filter_by
  453. )
  454. cluster_: Optional[List[Address]] = query_\
  455. .order_by(order_command)\
  456. .offset(offset).limit(size).all()
  457. if cluster_ is not None:
  458. cluster_: Dict[str, Any] = [
  459. to_dict(
  460. c,
  461. table_cols,
  462. to_add={'ens_name': get_ens_name(c.address, ns)},
  463. to_remove=['id'],
  464. to_transform=[
  465. ('entity', entity_to_str),
  466. ('heuristic', heuristic_to_str),
  467. ],
  468. )
  469. for c in cluster_
  470. ]
  471. cluster += cluster_
  472. cluster_size_: int = query_.limit(HARD_MAX).count()
  473. cluster_size += cluster_size_
  474. elif entity == EXCHANGE:
  475. # --- compute clusters if you are an exchange ---
  476. # find all deposit/exchange addresses in the same cluster
  477. if addr.exchange_cluster is not None:
  478. order_command: Any = get_order_command(sort_by, desc_sort)
  479. query_: Any = Address.query.filter(
  480. Address.exchange_cluster == addr.exchange_cluster,
  481. *filter_by
  482. )
  483. cluster_: Optional[List[Address]] = query_\
  484. .order_by(order_command)\
  485. .offset(offset).limit(size).all()
  486. if cluster_ is not None:
  487. cluster_: Dict[str, Any] = [
  488. to_dict(
  489. c,
  490. table_cols,
  491. to_add={'ens_name': get_ens_name(c.address, ns)},
  492. to_remove=['id'],
  493. to_transform=[
  494. ('entity', entity_to_str),
  495. ('heuristic', heuristic_to_str),
  496. ]
  497. )
  498. for c in cluster_
  499. ]
  500. cluster += cluster_
  501. cluster_size_: int = query_.limit(HARD_MAX).count()
  502. cluster_size += cluster_size_
  503. else:
  504. raise Exception(f'Entity {entity} not supported.')
  505. # find Diff2Vec embeddings and add to front of cluster
  506. diff2vec_cluster, diff2vec_size, diff2vec_conf = query_diff2vec(node, address)
  507. cluster: List[Dict[str, Any]] = diff2vec_cluster + cluster
  508. cluster_size += len(diff2vec_cluster)
  509. output['data']['cluster'] = cluster
  510. output['data']['metadata']['cluster_size'] = cluster_size
  511. output['data']['metadata']['num_pages'] = int(math.ceil(cluster_size / size))
  512. # --- compute anonymity score using hyperbolic fn ---
  513. anon_score = compute_anonymity_score(
  514. addr,
  515. ens_name = web3_resp['ens_name'],
  516. # seed computing anonymity score with diff2vec + tcash reveals
  517. extra_cluster_sizes = [
  518. diff2vec_size,
  519. tornado_dict['num_compromised']['num_compromised_exact_match'],
  520. tornado_dict['num_compromised']['num_compromised_gas_price'],
  521. tornado_dict['num_compromised']['num_compromised_multi_denom'],
  522. tornado_dict['num_compromised']['num_compromised_linked_tx'],
  523. tornado_dict['num_compromised']['num_compromised_torn_mine'],
  524. ],
  525. extra_cluster_confs = [
  526. diff2vec_conf,
  527. 1.,
  528. 1.,
  529. 0.5,
  530. 0.25,
  531. 0.25,
  532. ],
  533. )
  534. anon_score: float = round(anon_score, 3) # brevity is a virtue
  535. output['data']['query']['anonymity_score'] = anon_score
  536. # --- Case #2: address is not in the DAR Address table but is
  537. # in Embedding (Diff2Vec) table ---
  538. elif node is not None:
  539. # find Diff2Vec embeddings and add to front of cluster
  540. cluster, cluster_size, conf = query_diff2vec(node, address)
  541. anon_score = compute_anonymity_score(
  542. None,
  543. ens_name = web3_resp['ens_name'],
  544. # seed computing anonymity score with diff2vec + tcash reveals
  545. extra_cluster_sizes = [
  546. cluster_size,
  547. tornado_dict['num_compromised']['num_compromised_exact_match'],
  548. tornado_dict['num_compromised']['num_compromised_gas_price'],
  549. tornado_dict['num_compromised']['num_compromised_multi_denom'],
  550. tornado_dict['num_compromised']['num_compromised_linked_tx'],
  551. tornado_dict['num_compromised']['num_compromised_torn_mine'],
  552. ],
  553. extra_cluster_confs = [
  554. conf,
  555. 1.,
  556. 1.,
  557. 0.5,
  558. 0.25,
  559. 0.25,
  560. ],
  561. )
  562. anon_score: float = round(anon_score, 3)
  563. output['data']['query']['anonymity_score'] = anon_score
  564. output['data']['query']['heuristic'] = DIFF2VEC_HEUR
  565. output['data']['query']['entity'] = NODE
  566. output['data']['query']['conf'] = round(conf, 3)
  567. output['data']['query']['conf_label'] = conf_to_label(conf)
  568. output['data']['query']['hovers'] = {
  569. 'heuristic': 'this is the primary reveal linking the input address to addresses shown below. It will default to diff2vec, the ML algorithm.',
  570. 'conf': 'indicates confidence (between 0 and 1) that the below addresses are linked to the input address. This is based on how many reveals and the types of reveals that the input address has committed.'
  571. }
  572. output['data']['cluster'] = cluster
  573. output['data']['metadata']['cluster_size'] = cluster_size
  574. output['data']['metadata']['num_pages'] = int(math.ceil(cluster_size / size))
  575. # Check if we know existing information about this address
  576. known_lookup: Dict[str, Any] = get_known_attrs(known_addresses, address)
  577. if len(known_lookup) > 0:
  578. query_metadata: Dict[str, Any] = output['data']['query']['metadata']
  579. output['data']['query']['metadata'] = {**query_metadata, **known_lookup}
  580. # if you are on the top 20k users list, no anonymity
  581. output['data']['query']['anonymity_score'] = 0
  582. # if `addr` doesnt exist, then we assume no clustering
  583. output['success'] = 1
  584. response: str = json.dumps(output)
  585. rds.set(request_repr, bz2.compress(response.encode('utf-8'))) # add to cache
  586. return Response(response=response)
  587. def search_tornado(request: Request) -> Response:
  588. """
  589. We know the address we are searching for is a Tornado pool, which
  590. means we can provide special information about compromises.
  591. """
  592. checker: TornadoPoolRequestChecker = TornadoPoolRequestChecker(
  593. request,
  594. default_page = 0,
  595. default_limit = PAGE_LIMIT,
  596. )
  597. is_valid_request: bool = checker.check()
  598. output: Dict[str, Any] = default_tornado_response()
  599. if not is_valid_request:
  600. return Response(output)
  601. # check if we can find in cache
  602. request_repr: str = checker.to_str()
  603. if rds.exists(request_repr): # check if this exists in our cache
  604. response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
  605. return Response(response=response)
  606. address: str = checker.get('address').lower()
  607. page: int = checker.get('page')
  608. size: int = checker.get('limit')
  609. return_tx: bool = checker.get('return_tx')
  610. output['data']['query']['address'] = address
  611. output['data']['metadata']['page'] = page
  612. output['data']['metadata']['limit'] = size
  613. pool: pd.DataFrame = \
  614. tornado_pools[tornado_pools.address == address].iloc[0]
  615. deposit_txs: Set[str] = get_equal_user_deposit_txs(address)
  616. num_deposits: int = len(deposit_txs)
  617. exact_match_reveals: Set[str] = find_reveals(deposit_txs, ExactMatch)
  618. gas_price_reveals: Set[str] = find_reveals(deposit_txs, GasPrice)
  619. multi_denom_reveals: Set[str] = find_reveals(deposit_txs, MultiDenom)
  620. linked_tx_reveals: Set[str] = find_reveals(deposit_txs, LinkedTransaction)
  621. torn_mine_reveals: Set[str] = find_reveals(deposit_txs, TornMining)
  622. reveal_txs: Set[str] = set().union(
  623. exact_match_reveals, gas_price_reveals, multi_denom_reveals,
  624. linked_tx_reveals, torn_mine_reveals)
  625. num_exact_match_reveals: int = len(exact_match_reveals)
  626. num_gas_price_reveals: int = len(gas_price_reveals)
  627. num_multi_denom_reveals: int = len(multi_denom_reveals)
  628. num_linked_tx_reveals: int = len(linked_tx_reveals)
  629. num_torn_mine_reveals: int = len(torn_mine_reveals)
  630. num_compromised: int = len(reveal_txs)
  631. amount, currency = pool.tags.strip().split()
  632. stats: Dict[str, Any] = {
  633. 'num_deposits': num_deposits,
  634. 'tcash_num_compromised': {
  635. 'all_reveals': num_compromised,
  636. 'exact_match': num_exact_match_reveals,
  637. 'gas_price': num_gas_price_reveals,
  638. 'multi_denom': num_multi_denom_reveals,
  639. 'linked_tx': num_linked_tx_reveals,
  640. 'torn_mine': num_torn_mine_reveals,
  641. },
  642. 'tcash_num_uncompromised': num_deposits - num_compromised,
  643. 'hovers': {
  644. 'tcash_num_uncompromised': '# of deposits to tornado cash pools that are not potentially compromised by the five reveals'
  645. }
  646. }
  647. if return_tx:
  648. output['data']['deposits'] = list(deposit_txs)
  649. output['data']['compromised'] = {
  650. 'exact_match': list(exact_match_reveals),
  651. 'gas_price': list(gas_price_reveals),
  652. 'multi_denom': list(multi_denom_reveals),
  653. 'linked_tx': list(linked_tx_reveals),
  654. 'torn_mine': list(torn_mine_reveals),
  655. }
  656. output['data']['query']['metadata']['amount'] = float(amount)
  657. output['data']['query']['metadata']['currency'] = currency
  658. output['data']['query']['metadata']['stats'] = stats
  659. output['data']['metadata']['compromised_size'] = num_compromised
  660. output['success'] = 1
  661. response: str = json.dumps(output)
  662. rds.set(request_repr, bz2.compress(response.encode('utf-8')))
  663. return Response(response=response)
  664. @app.route('/transaction', methods=['GET'])
  665. def transaction():
  666. return render_template('transaction.html')
  667. def _search_transaction(
  668. address: str,
  669. start_date: datetime,
  670. end_date: datetime,
  671. ) -> Dict[str, List[Dict[str, Any]]]:
  672. def find_tcash_matches(address: str, Heuristic: Any, identifier: int
  673. ) -> List[Dict[str, Any]]:
  674. rows: List[Heuristic] = Heuristic.query.filter(
  675. Heuristic.address == address,
  676. Heuristic.block_ts >= start_date,
  677. Heuristic.block_ts < end_date,
  678. ).all()
  679. rows: List[Dict[str, Any]] = [
  680. {'transaction': row.transaction, 'block': row.block_number,
  681. 'timestamp': row.block_ts, 'heuristic': identifier,
  682. 'metadata': {}} for row in rows]
  683. return rows
  684. def find_dar_matches(address: str) -> List[Dict[str, Any]]:
  685. rows: List[DepositTransaction] = DepositTransaction.query.filter(
  686. DepositTransaction.address == address,
  687. DepositTransaction.block_ts >= start_date,
  688. DepositTransaction.block_ts < end_date,
  689. ).all()
  690. rows: List[Dict[str, Any]] = [
  691. {'transaction': row.transaction, 'block': row.block_number,
  692. 'timestamp': row.block_ts, 'heuristic': DEPO_REUSE_HEUR,
  693. 'metadata': {'deposit': row.deposit}} for row in rows]
  694. return rows
  695. dar_matches: List[Dict[str, Any]] = find_dar_matches(address)
  696. same_addr_matches: List[Dict[str, Any]] = \
  697. find_tcash_matches(address, ExactMatch, SAME_ADDR_HEUR)
  698. gas_price_matches: List[Dict[str, Any]] = \
  699. find_tcash_matches(address, GasPrice, GAS_PRICE_HEUR)
  700. same_num_tx_matches: List[Dict[str, Any]] = \
  701. find_tcash_matches(address, MultiDenom, SAME_NUM_TX_HEUR)
  702. linked_tx_matches: List[Dict[str, Any]] = \
  703. find_tcash_matches(address, LinkedTransaction, LINKED_TX_HEUR)
  704. torn_mine_matches: List[Dict[str, Any]] = \
  705. find_tcash_matches(address, TornMining, TORN_MINE_HEUR)
  706. transactions: List[Dict[str, Any]] = \
  707. dar_matches + same_addr_matches + gas_price_matches + same_num_tx_matches + \
  708. linked_tx_matches + torn_mine_matches
  709. # sort by timestamp
  710. transactions: List[Dict[str, Any]] = sorted(transactions, key = lambda x: x['timestamp'])
  711. def tx_datetime_to_str(raw_transactions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
  712. transactions: List[Dict[str, Any]] = []
  713. for tx in raw_transactions:
  714. tx['timestamp'] = tx['timestamp'].strftime('%m/%d/%Y')
  715. transactions.append(tx)
  716. return transactions
  717. # remove datetime objects
  718. transactions: List[Dict[str, Any]] = tx_datetime_to_str(transactions)
  719. output: Dict[str, List[Dict[str, Any]]] = {
  720. 'transactions': transactions,
  721. 'dar_matches': dar_matches,
  722. 'same_addr_matches': same_addr_matches,
  723. 'gas_price_matches': gas_price_matches,
  724. 'same_num_tx_matches': same_num_tx_matches,
  725. 'linked_tx_matches': linked_tx_matches,
  726. 'torn_mine_matches': torn_mine_matches,
  727. }
  728. return output
  729. @app.route('/search/transaction', methods=['GET'])
  730. def search_transaction():
  731. address: str = request.args.get('address', '')
  732. address: str = resolve_address(address, ns)
  733. address: str = address.lower()
  734. if not is_valid_address(address):
  735. return default_transaction_response()
  736. request.args = dict(request.args)
  737. request.args['address'] = address
  738. checker: TransactionRequestChecker = TransactionRequestChecker(
  739. request,
  740. default_page = 0,
  741. default_limit = PAGE_LIMIT,
  742. default_start_date='01/01/2013',
  743. default_end_date=get_today_date_str(),
  744. )
  745. is_valid_request: bool = checker.check()
  746. output: Dict[str, Any] = default_transaction_response()
  747. if not is_valid_request:
  748. return Response(output)
  749. address: str = checker.get('address').lower()
  750. start_date: str = checker.get('start_date')
  751. start_date_obj: datetime = checker.get('start_date_obj')
  752. end_date: str = checker.get('end_date')
  753. end_date_obj: datetime = checker.get('end_date_obj')
  754. page: int = checker.get('page')
  755. size: int = checker.get('limit')
  756. request_repr: str = checker.to_str()
  757. if rds.exists(request_repr):
  758. response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
  759. return Response(response=response)
  760. search_output: Dict[str, List[Dict[str, Any]]] = \
  761. _search_transaction(address, start_date_obj, end_date_obj)
  762. transactions: List[Dict[str, Any]] = search_output['transactions']
  763. stats: Dict[str, Dict[str, int]] = {
  764. 'num_transactions': len(transactions),
  765. 'num_ethereum': {
  766. DEPO_REUSE_HEUR: len(search_output['dar_matches']),
  767. },
  768. 'num_tcash': {
  769. SAME_ADDR_HEUR: len(search_output['same_addr_matches']),
  770. GAS_PRICE_HEUR: len(search_output['gas_price_matches']),
  771. SAME_NUM_TX_HEUR: len(search_output['same_num_tx_matches']),
  772. LINKED_TX_HEUR: len(search_output['linked_tx_matches']),
  773. TORN_MINE_HEUR: len(search_output['torn_mine_matches']),
  774. },
  775. }
  776. ranks: Dict[str, Dict[str, int]] = get_relative_rank(stats)
  777. stats['hovers'] = {
  778. 'num_transactions': 'Number of transaction reveals involving this Ethereum address',
  779. 'num_ethereum': 'Number of Ethereum transaction reveals based on the Deposit Address Reuse Reveal',
  780. 'num_tcash': 'Number of reveals by this address using Tornado Cash'
  781. }
  782. stats['num_tcash']['hovers'] = {
  783. SAME_ADDR_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the address match heuristic. Address match links transactions if a unique address deposits and withdraws to a Tornado Cash pool.',
  784. GAS_PRICE_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the unique gas price heuristic. Unique gas price links deposit and withdrawal transactions that use a unique and specific (e.g. 3.1415) gas price.',
  785. SAME_NUM_TX_HEUR: '# of deposit/withdrawals into tornado cash pools linked through the multi-denomination reveal. Multi-denomination reveal is when a “source” wallet mixes a specific set of denominations and your “destination” wallet withdraws them all. For example, if you mix 3x 10 ETH, 2x 1 ETH, 1x 0.1 ETH to get 32.1 ETH, you could reveal yourself within the Tornado protocol if no other wallet has mixed this exact denomination set.',
  786. LINKED_TX_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the linked address reveal. Linked address reveal connects wallets that interact outside of Tornado Cash.',
  787. TORN_MINE_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the TORN mining reveal. Careless swapping of Anonymity Points to TORN tokens reveal information of when deposits were made.',
  788. }
  789. stats['num_ethereum']['hovers'] = dict(
  790. DEPO_REUSE_HEUR = 'when two user addresses send to the same centralized exchange deposit address, they are linked by the deposit address reuse heuristic'
  791. )
  792. web3_resp: Dict[str, Any] = query_web3(address, w3, ns)
  793. addr: Optional[Address] = Address.query.filter_by(address = address).first()
  794. node: Optional[Embedding] = Embedding.query.filter_by(address = address).first()
  795. if addr is not None or node is not None:
  796. _, diff2vec_size, diff2vec_conf = query_diff2vec(node, address)
  797. tornado_dict: Dict[str, Any] = query_tornado_stats(address)
  798. anon_score = compute_anonymity_score(
  799. addr,
  800. ens_name = web3_resp['ens_name'],
  801. # seed computing anonymity score with diff2vec + tcash reveals
  802. extra_cluster_sizes = [
  803. diff2vec_size,
  804. tornado_dict['num_compromised']['num_compromised_exact_match'],
  805. tornado_dict['num_compromised']['num_compromised_gas_price'],
  806. tornado_dict['num_compromised']['num_compromised_multi_denom'],
  807. tornado_dict['num_compromised']['num_compromised_linked_tx'],
  808. tornado_dict['num_compromised']['num_compromised_torn_mine'],
  809. ],
  810. extra_cluster_confs = [
  811. diff2vec_conf,
  812. 1.,
  813. 1.,
  814. 0.5,
  815. 0.25,
  816. 0.25,
  817. ],
  818. )
  819. anon_score: float = round(anon_score, 3) # brevity is a virtue
  820. output['data']['query']['anonymity_score'] = anon_score
  821. # --
  822. output['data']['query']['address'] = address
  823. output['data']['query']['start_date'] = start_date
  824. output['data']['query']['end_date'] = end_date
  825. output['data']['metadata']['page'] = page
  826. output['data']['metadata']['limit'] = size
  827. output['data']['query']['metadata']['stats'] = stats
  828. output['data']['query']['metadata']['ranks'] = ranks
  829. output['data']['transactions'] = transactions
  830. output['success'] = 1
  831. response: str = json.dumps(output)
  832. rds.set(request_repr, bz2.compress(response.encode('utf-8'))) # add to cache
  833. return Response(response=response)
  834. @app.route('/plot/transaction', methods=['GET'])
  835. def make_weekly_plot():
  836. """
  837. Pass in `transactions` object from `/search/transaction` endpoint.
  838. We treat this as a seperate endpoint to allow for efficient repeated
  839. calls to this w/o requerying `/search/transaction`.
  840. """
  841. address: str = request.args.get('address', '')
  842. address: str = resolve_address(address, ns)
  843. address: str = address.lower()
  844. request.args = dict(request.args)
  845. request.args['address'] = address
  846. if not is_valid_address(address):
  847. return default_plot_response()
  848. window: str = request.args.get('window', '1yr')
  849. checker: PlotRequestChecker = PlotRequestChecker(request, default_window=window)
  850. is_valid_request: bool = checker.check()
  851. output: Dict[str, Any] = default_plot_response()
  852. if not is_valid_request:
  853. return Response(output)
  854. today: datetime = datetime.today()
  855. today: datetime = datetime.strptime(today.strftime('%m/%d/%Y'), '%m/%d/%Y')
  856. if window == '1mth':
  857. delta: relativedelta = relativedelta(months=1)
  858. elif window == '3mth':
  859. delta: relativedelta = relativedelta(months=3)
  860. elif window == '6mth':
  861. delta: relativedelta = relativedelta(months=6)
  862. elif window == '1yr':
  863. delta: relativedelta = relativedelta(months=12)
  864. elif window == '3yr':
  865. delta: relativedelta = relativedelta(months=12*3)
  866. elif window == '5yr':
  867. delta: relativedelta = relativedelta(months=12*5)
  868. else:
  869. raise Exception(f'Window {window} not supported.')
  870. start_date_obj: datetime = today - delta
  871. search_output: Dict[str, List[Dict[str, Any]]] = \
  872. _search_transaction(address, start_date_obj, today)
  873. transactions: List[Dict[str, Any]] = search_output['transactions']
  874. data: List[Dict[str, Any]] = []
  875. cur_start: datetime = copy.copy(start_date_obj)
  876. cur_end: datetime = cur_start + relativedelta(weeks=1)
  877. count: int = 0
  878. while cur_end <= today:
  879. counts: Dict[str, int] = {
  880. DEPO_REUSE_HEUR: 0,
  881. SAME_ADDR_HEUR: 0,
  882. GAS_PRICE_HEUR: 0,
  883. SAME_NUM_TX_HEUR: 0,
  884. LINKED_TX_HEUR: 0,
  885. TORN_MINE_HEUR: 0,
  886. }
  887. for transaction in transactions:
  888. ts: datetime = datetime.strptime(transaction['timestamp'], '%m/%d/%Y')
  889. if (ts >= cur_start) and (ts < cur_end):
  890. counts[transaction['heuristic']] += 1
  891. start_date: str = cur_start.strftime('%m/%d/%Y')
  892. end_date: str = cur_end.strftime('%m/%d/%Y')
  893. row: Dict[str, Any] = {
  894. 'index': count,
  895. 'start_date': start_date,
  896. 'end_date': end_date,
  897. 'name': f'{start_date}-{end_date}',
  898. **counts,
  899. }
  900. data.append(row)
  901. cur_start: datetime = copy.copy(cur_end)
  902. cur_end: datetime = cur_start + relativedelta(weeks=1)
  903. count += 1
  904. output['query']['window'] = window
  905. output['query']['start_time'] = start_date_obj.strftime('%m/%d/%Y')
  906. output['query']['end_time'] = today.strftime('%m/%d/%Y')
  907. output['query']['metadata']['num_points'] = len(data)
  908. output['query']['metadata']['today'] = today.strftime('%m/%d/%Y')
  909. output['data'] = data
  910. output['success'] = 1
  911. response: str = json.dumps(output)
  912. return Response(response=response)
  913. def get_relative_rank(my_stats: Dict[str, int]) -> Dict[str, Dict[str, int]]:
  914. ranks: Dict[str, Dict[str, int]] = {
  915. 'overall': 0,
  916. 'ethereum': {},
  917. 'tcash': {},
  918. 'hovers': {
  919. 'ethereum': 'percentile ranking of reveals by this address vs. other ethereum addresses',
  920. 'tcash': 'percentile ranking of reveals by this address vs. other ethereum addresses that have used Tornado Cash',
  921. }
  922. }
  923. overall: List[float] = []
  924. for heuristic in my_stats['num_ethereum']:
  925. rank: float = compute_rank(my_stats['num_ethereum'][heuristic], reveal_dists[heuristic])
  926. ranks['ethereum'][heuristic] = int(100 * rank)
  927. overall.append(rank)
  928. for heuristic in my_stats['num_ethereum']:
  929. ranks['ethereum'][heuristic] = str(ranks['ethereum'][heuristic]) + '%'
  930. for heuristic in my_stats['num_tcash']:
  931. rank: float = compute_rank(my_stats['num_tcash'][heuristic], reveal_dists[heuristic])
  932. ranks['tcash'][heuristic] = int(100 * rank)
  933. overall.append(rank)
  934. for heuristic in my_stats['num_tcash']:
  935. ranks['tcash'][heuristic] = str(ranks['tcash'][heuristic]) + '%'
  936. overall: int = int(100 * float(np.mean(overall)))
  937. ranks['overall'] = str(overall) + '%'
  938. return ranks
  939. def compute_rank(count: int, dist: Dict[int, int]) -> float:
  940. total: int = int(sum(dist.values()))
  941. bins: List[int] = sorted(list(dist.keys()))
  942. vals: List[int] = [dist[bin] for bin in bins]
  943. bins: np.array = np.array(bins)
  944. vals: np.array = np.array(vals)
  945. cdf: int = int(np.sum(vals[bins < count]))
  946. prob: float = cdf / float(total)
  947. return prob