12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124 |
- import bz2
- import math
- import json
- import copy
- import numpy as np
- import pandas as pd
- from datetime import datetime
- from dateutil.relativedelta import relativedelta
- from typing import Dict, Optional, List, Any, Set
- from app import app, w3, ns, rds, known_addresses, tornado_pools, reveal_dists
- from app.models import \
- Address, ExactMatch, GasPrice, MultiDenom, LinkedTransaction, TornMining, \
- TornadoDeposit, TornadoWithdraw, Embedding, DepositTransaction
- from app.utils import \
- get_anonymity_score, get_order_command, \
- entity_to_int, entity_to_str, to_dict, conf_to_label, \
- heuristic_to_str, is_valid_address, get_today_date_str, \
- is_tornado_address, get_equal_user_deposit_txs, find_reveals, \
- AddressRequestChecker, TornadoPoolRequestChecker, \
- TransactionRequestChecker, PlotRequestChecker, \
- default_address_response, default_tornado_response, \
- default_transaction_response, default_plot_response, \
- NAME_COL, ENTITY_COL, CONF_COL, EOA, DEPOSIT, EXCHANGE, NODE, \
- GAS_PRICE_HEUR, DEPO_REUSE_HEUR, DIFF2VEC_HEUR, SAME_NUM_TX_HEUR, \
- SAME_ADDR_HEUR, LINKED_TX_HEUR, TORN_MINE_HEUR, DIFF2VEC_HEUR
- from app.lib.w3 import query_web3, get_ens_name, resolve_address
- from flask import request, Request, Response
- from flask import render_template
- from sqlalchemy import or_
- from app.utils import get_known_attrs, get_display_aliases
- PAGE_LIMIT = 50
- HARD_MAX: int = 1000
- @app.route('/', methods=['GET'])
- @app.route('/index', methods=['GET'])
- @app.route('/cluster', methods=['GET'])
- @app.route('/transactions', methods=['GET'])
- def index():
- return render_template('index.html')
- @app.route('/utils/aliases', methods=['GET'])
- def alias():
- response: str = json.dumps(get_display_aliases())
- return Response(response=response)
- @app.route('/utils/istornado', methods=['GET'])
- def istornado():
- address: str = request.args.get('address', '')
- address: str = resolve_address(address, ns)
- address: str = address.lower()
- output: Dict[str, Any] = {
- 'data': {
- 'address': address,
- 'is_tornado': 1,
- 'amount': 0,
- 'currency': '',
- },
- 'success': 0,
- }
- if not is_valid_address(address):
- return Response(json.dumps(output))
- is_tornado: bool = int(is_tornado_address(address))
- if not is_tornado:
- amount = None
- currency = None
- else:
- pool: pd.DataFrame = \
- tornado_pools[tornado_pools.address == address].iloc[0]
- amount, currency = pool.tags.strip().split()
- amount = int(amount)
- output['data']['is_tornado'] = is_tornado
- output['data']['amount'] = amount
- output['data']['currency'] = currency
- output['success'] = 1
- response: str = json.dumps(output)
- return Response(response)
- @app.route('/utils/gettornadopools', methods=['GET'])
- def get_tornado_pools():
- pools = []
- for _, pool in tornado_pools.iterrows():
- # amount, currency = pool.tags.strip().split()
- pools.append({
- 'address': pool.address,
- 'name': pool.tags,
- })
- output: Dict[str, Any] = {
- 'data': {'pools': pools},
- 'success': 1,
- }
- response: str = json.dumps(output)
- return Response(response)
- @app.route('/search', methods=['GET'])
- def search():
- address: str = request.args.get('address', '')
- # after this call, we should expect address to be an address
- address: str = resolve_address(address, ns)
- address: str = address.lower()
- # do a simple check that the address is valid
- if not is_valid_address(address):
- return default_address_response()
- # check if address is a tornado pool or not
- is_tornado: bool = is_tornado_address(address)
- # change request object
- request.args = dict(request.args)
- request.args['address'] = address
-
- if is_tornado:
- # ---------------------------------------------------------
- # MODE #1
- # This is a TCash pool, so we can show specific information
- # about compromised addresses via our heuristics.
- # ---------------------------------------------------------
- response: Response = search_tornado(request)
- else:
- # ---------------------------------------------------------
- # MODE #2
- # This is a regular address, so we can search our dataset
- # for its cluster and complimentary information.
- # ---------------------------------------------------------
- response: Response = search_address(request)
- return response
- @app.route('/search/compromised', methods=['GET'])
- def haveibeencompromised():
- address: str = request.args.get('address', '')
- pool: str = request.args.get('pool', '') # tornado pool address
- address: str = resolve_address(address, ns)
- output: Dict[str, Any] = {
- 'data': {
- 'address': address,
- 'pool': pool,
- 'compromised_size': 0,
- 'compromised': [],
- },
- 'success': 0,
- }
- if not is_valid_address(address) or not is_valid_address(pool):
- return Response(json.dumps(output))
- # find all the deposit transactions made by user for this pool
- deposits: Optional[List[TornadoDeposit]] = \
- TornadoDeposit.query.filter_by(
- from_address = address,
- tornado_cash_address = pool,
- ).all()
- deposit_txs: Set[str] = set([d.hash for d in deposits])
- # search for these txs in the reveal tables
- exact_match_reveals: Set[str] = find_reveals(deposit_txs, ExactMatch)
- gas_price_reveals: Set[str] = find_reveals(deposit_txs, GasPrice)
- multi_denom_reveals: Set[str] = find_reveals(deposit_txs, MultiDenom)
- linked_tx_reveals: Set[str] = find_reveals(deposit_txs, LinkedTransaction)
- torn_mine_reveals: Set[str] = find_reveals(deposit_txs, TornMining)
- def format_compromised(
- exact_match_reveals: Set[str],
- gas_price_reveals: Set[str],
- multi_denom_reveals: Set[str],
- linked_tx_reveals: Set[str],
- torn_mine_reveals: Set[str],
- ) -> List[Dict[str, Any]]:
- compromised: List[Dict[str, Any]] = []
- for reveal in exact_match_reveals:
- compromised.append({'heuristic': heuristic_to_str(1), 'transaction': reveal})
- for reveal in gas_price_reveals:
- compromised.append({'heuristic': heuristic_to_str(2), 'transaction': reveal})
- for reveal in multi_denom_reveals:
- compromised.append({'heuristic': heuristic_to_str(3), 'transaction': reveal})
- for reveal in linked_tx_reveals:
- compromised.append({'heuristic': heuristic_to_str(4), 'transaction': reveal})
- for reveal in torn_mine_reveals:
- compromised.append({'heuristic': heuristic_to_str(5), 'transaction': reveal})
- return compromised
- # add compromised sets to response
- compromised: List[Dict[str, Any]] = format_compromised(
- exact_match_reveals, gas_price_reveals, multi_denom_reveals,
- linked_tx_reveals, torn_mine_reveals)
- output['data']['compromised'] = compromised
- output['data']['compromised_size'] = len(compromised)
- output['success'] = 1
- response: str = json.dumps(output)
- return Response(response)
- def query_diff2vec(node: Embedding, address) -> List[Dict[str, Any]]:
- """
- Search the embedding table to fetch neighbors from Diff2Vec cluster.
- """
- cluster: List[Dict[str, Any]] = []
- cluster_conf: float = 0
- if node is not None:
- neighbors: List[int] = json.loads(node.neighbors)
- distances: List[float] = json.loads(node.distances)
- for neighbor, distance in zip(neighbors, distances):
- # swap terms b/c of upload accident
- neighbor, distance = distance, neighbor
- if neighbor == address: continue # skip
- cur_conf: float = float(1./abs(10.*distance+1.))
- member: Dict[str, Any] = {
- 'address': neighbor,
- # '_distance': distance,
- # add one to make max 1
- 'conf': round(cur_conf, 3),
- 'conf_label': conf_to_label(cur_conf),
- 'heuristic': DIFF2VEC_HEUR,
- 'entity': NODE,
- 'ens_name': get_ens_name(neighbor, ns),
- }
- cluster.append(member)
- cluster_conf += member['conf']
- cluster_size: int = len(cluster)
- cluster_conf: float = cluster_conf / float(cluster_size)
- return cluster, cluster_size, cluster_conf
- def compute_anonymity_score(
- addr: Optional[Address],
- ens_name: Optional[str] = None,
- exchange_weight: float = 0.1,
- slope: float = 0.1,
- extra_cluster_sizes: List[int] = [],
- extra_cluster_confs: List[float] = []
- ) -> float:
- """
- Only EOA addresses have an anonymity score. If we get an exchange,
- we return an anonymity score of 0. If we get a deposit, we return -1,
- which represents N/A.
- For EOA addresses, we grade the anonymity by the confidence and number
- of other EOA addresses in the same cluster, as well as the confidence
- and number of other exchanges in the same cluster (which we find through
- the deposits this address interacts with). Exchange interactions are
- discounted (by `exchange_weight` factor) compared to other EOAs.
- If ens_name is provided and not empty, we cap the anonymity score at 90.
- If addr is None, we assume clusters are specified in extra_cluster_*.
- """
- cluster_confs: List[float] = extra_cluster_sizes
- cluster_sizes: List[float] = extra_cluster_confs
- if addr is not None:
- if addr.entity == entity_to_int(DEPOSIT):
- return -1 # represents N/A
- elif addr.entity == entity_to_int(EXCHANGE):
- return 0 # CEX have no anonymity
- assert addr.entity == entity_to_int(EOA), \
- f'Unknown entity: {entity_to_str(addr.entity)}'
- if addr.user_cluster is not None:
- # find all other EOA addresses with same `dar_user_cluster`.
- num_cluster: int = Address.query.filter(
- Address.user_cluster == addr.user_cluster,
- or_(Address.entity == entity_to_int(EOA)),
- ).limit(HARD_MAX).count()
- cluster_confs.append(addr.conf)
- cluster_sizes.append(num_cluster)
- # find all DEPOSIT address with same `user_cluster`.
- deposits: Optional[List[Address]] = Address.query.filter(
- Address.user_cluster == addr.user_cluster,
- Address.entity == entity_to_int(DEPOSIT),
- ).limit(HARD_MAX).all()
- exchanges: Set[str] = set([
- deposit.exchange_cluster for deposit in deposits])
- cluster_confs.append(addr.conf * exchange_weight)
- cluster_sizes.append(len(exchanges))
- cluster_confs: np.array = np.array(cluster_confs)
- cluster_sizes: np.array = np.array(cluster_sizes)
- score: float = get_anonymity_score(
- cluster_confs, cluster_sizes, slope = slope)
- if ens_name is not None:
- if len(ens_name) > 0 and '.eth' in ens_name:
- # having an ENS name caps your maximum anonymity score
- score: float = min(score, 0.90)
- return score
- def query_heuristic(address: str, class_: Any) -> Set[str]:
- """
- Given an address, find out how many times this address' txs
- appear in a heuristic. Pass the table class for heuristic.
- """
- rows: Optional[List[class_]] = \
- class_.query.filter_by(address = address).all()
- cluster_txs: List[str] = []
- if (len(rows) > 0):
- clusters: List[int] = list(set([row.cluster for row in rows]))
- cluster: List[class_] = \
- class_.query.filter(class_.cluster.in_(clusters)).all()
- cluster_txs: List[str] = [row.transaction for row in cluster]
- return set(cluster_txs) # no duplicates
- def query_tornado_stats(address: str) -> Dict[str, Any]:
- """
- Given a user address, we want to supply a few statistics:
- 1) Number of deposits made to Tornado pools.
- 2) Number of withdraws made to Tornado pools.
- 3) Number of deposits made that are part of a cluster or of a TCash reveal.
- """
- exact_match_txs: Set[str] = query_heuristic(address, ExactMatch)
- gas_price_txs: Set[str] = query_heuristic(address, GasPrice)
- multi_denom_txs: Set[str] = query_heuristic(address, MultiDenom)
- linked_txs: Set[str] = query_heuristic(address, LinkedTransaction)
- torn_mine_txs: Set[str] = query_heuristic(address, TornMining)
- reveal_txs: Set[str] = set().union(
- exact_match_txs, gas_price_txs, multi_denom_txs,
- linked_txs, torn_mine_txs)
- # find all txs where the from_address is the current user.
- deposits: Optional[List[TornadoDeposit]] = \
- TornadoDeposit.query.filter_by(from_address = address).all()
- deposit_txs: Set[str] = set([d.hash for d in deposits])
- num_deposit: int = len(deposit_txs)
- # find all txs where the recipient_address is the current user
- withdraws: Optional[List[TornadoWithdraw]] = \
- TornadoWithdraw.query.filter_by(recipient_address = address).all()
- withdraw_txs: Set[str] = set([w.hash for w in withdraws])
- num_withdraw: int = len(withdraw_txs)
- all_txs: Set[str] = deposit_txs.union(withdraw_txs)
- num_all: int = num_deposit + num_withdraw
- num_remain: int = len(all_txs - reveal_txs)
- num_remain_exact_match: int = len(all_txs - exact_match_txs)
- num_remain_gas_price: int = len(all_txs - gas_price_txs)
- num_remain_multi_denom: int = len(all_txs - multi_denom_txs)
- num_remain_linked_tx: int = len(all_txs - linked_txs)
- num_remain_torn_mine: int = len(all_txs - torn_mine_txs)
- num_compromised: int = num_all - num_remain
- num_compromised_exact_match = num_all - num_remain_exact_match
- num_compromised_gas_price = num_all - num_remain_gas_price
- num_compromised_multi_denom = num_all - num_remain_multi_denom
- num_compromised_linked_tx = num_all - num_remain_linked_tx
- num_compromised_torn_mine = num_all - num_remain_torn_mine
- # compute number of txs compromised by TCash heuristics
- stats: Dict[str, Any] = dict(
- num_deposit = num_deposit,
- num_withdraw = num_withdraw,
- num_compromised = dict(
- all_reveals = num_compromised,
- num_compromised_exact_match = num_compromised_exact_match,
- num_compromised_gas_price = num_compromised_gas_price,
- num_compromised_multi_denom = num_compromised_multi_denom,
- num_compromised_linked_tx = num_compromised_linked_tx,
- num_compromised_torn_mine = num_compromised_torn_mine,
- hovers = dict(
- num_compromised_exact_match = '# of deposits to/withdrawals from tornado cash pools linked through the address match heuristic. Address match links transactions if a unique address deposits and withdraws to a Tornado Cash pool.',
- num_compromised_gas_price = '# of deposits to/withdrawals from tornado cash pools linked through the unique gas price heuristic. Unique gas price links deposit and withdrawal transactions that use a unique and specific (e.g. 3.1415) gas price.',
- num_compromised_multi_denom = '# of deposit/withdrawals into tornado cash pools linked through the multi-denomination reveal. Multi-denomination reveal is when a “source” wallet mixes a specific set of denominations and your “destination” wallet withdraws them all. For example, if you mix 3x 10 ETH, 2x 1 ETH, 1x 0.1 ETH to get 32.1 ETH, you could reveal yourself within the Tornado protocol if no other wallet has mixed this exact denomination set.',
- num_compromised_linked_tx = '# of deposits to/withdrawals from tornado cash pools linked through the linked address reveal. Linked address reveal connects wallets that interact outside of Tornado Cash.',
- num_compromised_torn_mine = '# of deposits to/withdrawals from tornado cash pools linked through the TORN mining reveal. Careless swapping of Anonymity Points to TORN tokens reveal information of when deposits were made.',
- )
- ),
- num_uncompromised = num_all - num_compromised,
- hovers = dict(
- num_deposit = '# of deposit transactions into tornado cash pools.',
- num_withdraw = '# of withdrawal transactions from tornado cash pools.',
- num_compromised = '# of deposits to/withdrawals from tornado cash pools that may be linked through the mis-use of Tornado cash.',
- num_uncompromised = '# of deposits to/withdrawals from tornado cash pools that are not potentially linked by the five reveals',
- )
- )
- return stats
- def search_address(request: Request) -> Response:
- """
- Master function for serving address requests. This function
- will first check if the request is valid, then find clusters
- corresponding to this address, as well as return auxilary
- information, such as web3 info and Tornado specific info.
- Has support for Redis for fast querying. Even if no clusters
- are found, Tornado and basic info is still returned.
- """
- table_cols: Set[str] = set(Address.__table__.columns.keys())
- # Check if this is a valid request searching for an address
- checker: AddressRequestChecker = AddressRequestChecker(
- request,
- table_cols,
- entity_key = ENTITY_COL,
- conf_key = CONF_COL,
- name_key = NAME_COL,
- default_page = 0,
- default_limit = PAGE_LIMIT,
- )
- is_valid_request: bool = checker.check()
- output: Dict[str, Any] = default_address_response()
- if not is_valid_request: # if not, bunt
- return Response(output)
- address: str = checker.get('address').lower()
- page: int = checker.get('page')
- size: int = checker.get('limit')
- sort_by: str = checker.get('sort_by')
- desc_sort: str = checker.get('desc_sort')
- filter_by: List[Any] = checker.get('filter_by')
- request_repr: str = checker.to_str()
- if rds.exists(request_repr): # check if this exists in our cache
- response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
- return Response(response=response)
- # --- fill out some of the known response fields ---
- output['data']['query']['address'] = address
- output['data']['metadata']['page'] = page
- output['data']['metadata']['limit'] = size
- for k in output['data']['metadata']['filter_by'].keys():
- output['data']['metadata']['filter_by'][k] = checker.get(f'filter_{k}')
- if len(address) > 0:
- offset: int = page * size
- # --- check web3 for information ---
- web3_resp: Dict[str, Any] = query_web3(address, w3, ns)
- metadata_: Dict[str, Any] = output['data']['query']['metadata']
- output['data']['query']['metadata'] = {**metadata_, **web3_resp}
- # --- check tornado queries ---
- # Note that this is out of the `Address` existence check
- tornado_dict: Dict[str, Any] = query_tornado_stats(address)
- output['data']['tornado']['summary']['address'].update(tornado_dict)
- # --- search for address in DAR and Dff2Vec tables ---
- addr: Optional[Address] = Address.query.filter_by(address = address).first()
- node: Optional[Embedding] = Embedding.query.filter_by(address = address).first()
- # --- Case #1 : address can be found in the DAR Address table ---
- if addr is not None:
- entity: str = entity_to_str(addr.entity)
- if addr.meta_data is None: addr.meta_data = '{}'
- addr_metadata: Dict[str, Any] = json.loads(addr.meta_data) # load metadata
- if 'ens_name' in addr_metadata: del addr_metadata['ens_name'] # no override
- metadata_: Dict[str, Any] = output['data']['query']['metadata']
- output['data']['query']['metadata'] = {**metadata_, **addr_metadata}
- # store the clusters in here
- cluster: List[Address] = []
- # stores cluster size with filters. This is necessary to reflect changes
- # in # of elements with new filters.
- cluster_size: int = 0
- query_data: Dict[str, Any] = output['data']['query']
- output['data']['query'] = {
- **query_data,
- **to_dict(addr, table_cols, to_transform=[
- ('entity', entity_to_str),
- ('heuristic', heuristic_to_str),
- ])
- }
- if entity == EOA:
- # --- compute clusters if you are an EOA ---
- if addr.user_cluster is not None:
- order_command: Any = get_order_command(sort_by, desc_sort)
- # find all deposit/eoa addresses in the same cluster & filtering attrs
- query_: Any = Address.query.filter(
- Address.user_cluster == addr.user_cluster,
- *filter_by
- )
- cluster_: Optional[List[Address]] = query_\
- .order_by(order_command)\
- .offset(offset).limit(size).all()
- if cluster_ is not None:
- cluster_: List[Dict[str, Any]] = [
- to_dict(
- c,
- table_cols,
- to_add={'ens_name': get_ens_name(c.address, ns)},
- to_remove=['id'],
- to_transform=[
- ('entity', entity_to_str),
- ('heuristic', heuristic_to_str),
- ],
- )
- for c in cluster_
- ]
- cluster += cluster_
- # get total number of elements in query
- cluster_size_: int = query_.limit(HARD_MAX).count()
- cluster_size += cluster_size_
- elif entity == DEPOSIT:
- # --- compute clusters if you are a deposit ---
- # for deposits, we can both look up all relevant eoa's and
- # all relevant exchanges. These are in two different clusters
- if addr.user_cluster is not None:
- order_command: Any = get_order_command(sort_by, desc_sort)
- query_: Any = Address.query.filter(
- Address.user_cluster == addr.user_cluster,
- *filter_by
- )
- cluster_: Optional[List[Address]] = query_\
- .order_by(order_command)\
- .offset(offset).limit(size).all()
-
- if cluster_ is not None:
- cluster_: Dict[str, Any] = [
- to_dict(
- c,
- table_cols,
- to_add={'ens_name': get_ens_name(c.address, ns)},
- to_remove=['id'],
- to_transform=[
- ('entity', entity_to_str),
- ('heuristic', heuristic_to_str),
- ],
- )
- for c in cluster_
- ]
- cluster += cluster_
- cluster_size_: int = query_.limit(HARD_MAX).count()
- cluster_size += cluster_size_
- elif entity == EXCHANGE:
- # --- compute clusters if you are an exchange ---
- # find all deposit/exchange addresses in the same cluster
- if addr.exchange_cluster is not None:
- order_command: Any = get_order_command(sort_by, desc_sort)
- query_: Any = Address.query.filter(
- Address.exchange_cluster == addr.exchange_cluster,
- *filter_by
- )
- cluster_: Optional[List[Address]] = query_\
- .order_by(order_command)\
- .offset(offset).limit(size).all()
- if cluster_ is not None:
- cluster_: Dict[str, Any] = [
- to_dict(
- c,
- table_cols,
- to_add={'ens_name': get_ens_name(c.address, ns)},
- to_remove=['id'],
- to_transform=[
- ('entity', entity_to_str),
- ('heuristic', heuristic_to_str),
- ]
- )
- for c in cluster_
- ]
- cluster += cluster_
- cluster_size_: int = query_.limit(HARD_MAX).count()
- cluster_size += cluster_size_
- else:
- raise Exception(f'Entity {entity} not supported.')
- # find Diff2Vec embeddings and add to front of cluster
- diff2vec_cluster, diff2vec_size, diff2vec_conf = query_diff2vec(node, address)
- cluster: List[Dict[str, Any]] = diff2vec_cluster + cluster
- cluster_size += len(diff2vec_cluster)
- output['data']['cluster'] = cluster
- output['data']['metadata']['cluster_size'] = cluster_size
- output['data']['metadata']['num_pages'] = int(math.ceil(cluster_size / size))
- # --- compute anonymity score using hyperbolic fn ---
- anon_score = compute_anonymity_score(
- addr,
- ens_name = web3_resp['ens_name'],
- # seed computing anonymity score with diff2vec + tcash reveals
- extra_cluster_sizes = [
- diff2vec_size,
- tornado_dict['num_compromised']['num_compromised_exact_match'],
- tornado_dict['num_compromised']['num_compromised_gas_price'],
- tornado_dict['num_compromised']['num_compromised_multi_denom'],
- tornado_dict['num_compromised']['num_compromised_linked_tx'],
- tornado_dict['num_compromised']['num_compromised_torn_mine'],
- ],
- extra_cluster_confs = [
- diff2vec_conf,
- 1.,
- 1.,
- 0.5,
- 0.25,
- 0.25,
- ],
- )
- anon_score: float = round(anon_score, 3) # brevity is a virtue
- output['data']['query']['anonymity_score'] = anon_score
- # --- Case #2: address is not in the DAR Address table but is
- # in Embedding (Diff2Vec) table ---
- elif node is not None:
- # find Diff2Vec embeddings and add to front of cluster
- cluster, cluster_size, conf = query_diff2vec(node, address)
- anon_score = compute_anonymity_score(
- None,
- ens_name = web3_resp['ens_name'],
- # seed computing anonymity score with diff2vec + tcash reveals
- extra_cluster_sizes = [
- cluster_size,
- tornado_dict['num_compromised']['num_compromised_exact_match'],
- tornado_dict['num_compromised']['num_compromised_gas_price'],
- tornado_dict['num_compromised']['num_compromised_multi_denom'],
- tornado_dict['num_compromised']['num_compromised_linked_tx'],
- tornado_dict['num_compromised']['num_compromised_torn_mine'],
- ],
- extra_cluster_confs = [
- conf,
- 1.,
- 1.,
- 0.5,
- 0.25,
- 0.25,
- ],
- )
- anon_score: float = round(anon_score, 3)
- output['data']['query']['anonymity_score'] = anon_score
- output['data']['query']['heuristic'] = DIFF2VEC_HEUR
- output['data']['query']['entity'] = NODE
- output['data']['query']['conf'] = round(conf, 3)
- output['data']['query']['conf_label'] = conf_to_label(conf)
- output['data']['query']['hovers'] = {
- 'heuristic': 'this is the primary reveal linking the input address to addresses shown below. It will default to diff2vec, the ML algorithm.',
- 'conf': 'indicates confidence (between 0 and 1) that the below addresses are linked to the input address. This is based on how many reveals and the types of reveals that the input address has committed.'
- }
- output['data']['cluster'] = cluster
- output['data']['metadata']['cluster_size'] = cluster_size
- output['data']['metadata']['num_pages'] = int(math.ceil(cluster_size / size))
- # Check if we know existing information about this address
- known_lookup: Dict[str, Any] = get_known_attrs(known_addresses, address)
-
- if len(known_lookup) > 0:
- query_metadata: Dict[str, Any] = output['data']['query']['metadata']
- output['data']['query']['metadata'] = {**query_metadata, **known_lookup}
- # if you are on the top 20k users list, no anonymity
- output['data']['query']['anonymity_score'] = 0
- # if `addr` doesnt exist, then we assume no clustering
- output['success'] = 1
- response: str = json.dumps(output)
- rds.set(request_repr, bz2.compress(response.encode('utf-8'))) # add to cache
- return Response(response=response)
- def search_tornado(request: Request) -> Response:
- """
- We know the address we are searching for is a Tornado pool, which
- means we can provide special information about compromises.
- """
- checker: TornadoPoolRequestChecker = TornadoPoolRequestChecker(
- request,
- default_page = 0,
- default_limit = PAGE_LIMIT,
- )
- is_valid_request: bool = checker.check()
- output: Dict[str, Any] = default_tornado_response()
- if not is_valid_request:
- return Response(output)
- # check if we can find in cache
- request_repr: str = checker.to_str()
- if rds.exists(request_repr): # check if this exists in our cache
- response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
- return Response(response=response)
- address: str = checker.get('address').lower()
- page: int = checker.get('page')
- size: int = checker.get('limit')
- return_tx: bool = checker.get('return_tx')
- output['data']['query']['address'] = address
- output['data']['metadata']['page'] = page
- output['data']['metadata']['limit'] = size
- pool: pd.DataFrame = \
- tornado_pools[tornado_pools.address == address].iloc[0]
- deposit_txs: Set[str] = get_equal_user_deposit_txs(address)
- num_deposits: int = len(deposit_txs)
- exact_match_reveals: Set[str] = find_reveals(deposit_txs, ExactMatch)
- gas_price_reveals: Set[str] = find_reveals(deposit_txs, GasPrice)
- multi_denom_reveals: Set[str] = find_reveals(deposit_txs, MultiDenom)
- linked_tx_reveals: Set[str] = find_reveals(deposit_txs, LinkedTransaction)
- torn_mine_reveals: Set[str] = find_reveals(deposit_txs, TornMining)
- reveal_txs: Set[str] = set().union(
- exact_match_reveals, gas_price_reveals, multi_denom_reveals,
- linked_tx_reveals, torn_mine_reveals)
- num_exact_match_reveals: int = len(exact_match_reveals)
- num_gas_price_reveals: int = len(gas_price_reveals)
- num_multi_denom_reveals: int = len(multi_denom_reveals)
- num_linked_tx_reveals: int = len(linked_tx_reveals)
- num_torn_mine_reveals: int = len(torn_mine_reveals)
- num_compromised: int = len(reveal_txs)
- amount, currency = pool.tags.strip().split()
- stats: Dict[str, Any] = {
- 'num_deposits': num_deposits,
- 'tcash_num_compromised': {
- 'all_reveals': num_compromised,
- 'exact_match': num_exact_match_reveals,
- 'gas_price': num_gas_price_reveals,
- 'multi_denom': num_multi_denom_reveals,
- 'linked_tx': num_linked_tx_reveals,
- 'torn_mine': num_torn_mine_reveals,
- },
- 'tcash_num_uncompromised': num_deposits - num_compromised,
- 'hovers': {
- 'tcash_num_uncompromised': '# of deposits to tornado cash pools that are not potentially compromised by the five reveals'
- }
- }
- if return_tx:
- output['data']['deposits'] = list(deposit_txs)
- output['data']['compromised'] = {
- 'exact_match': list(exact_match_reveals),
- 'gas_price': list(gas_price_reveals),
- 'multi_denom': list(multi_denom_reveals),
- 'linked_tx': list(linked_tx_reveals),
- 'torn_mine': list(torn_mine_reveals),
- }
- output['data']['query']['metadata']['amount'] = float(amount)
- output['data']['query']['metadata']['currency'] = currency
- output['data']['query']['metadata']['stats'] = stats
- output['data']['metadata']['compromised_size'] = num_compromised
- output['success'] = 1
- response: str = json.dumps(output)
- rds.set(request_repr, bz2.compress(response.encode('utf-8')))
- return Response(response=response)
- @app.route('/transaction', methods=['GET'])
- def transaction():
- return render_template('transaction.html')
- def _search_transaction(
- address: str,
- start_date: datetime,
- end_date: datetime,
- ) -> Dict[str, List[Dict[str, Any]]]:
-
- def find_tcash_matches(address: str, Heuristic: Any, identifier: int
- ) -> List[Dict[str, Any]]:
- rows: List[Heuristic] = Heuristic.query.filter(
- Heuristic.address == address,
- Heuristic.block_ts >= start_date,
- Heuristic.block_ts < end_date,
- ).all()
- rows: List[Dict[str, Any]] = [
- {'transaction': row.transaction, 'block': row.block_number,
- 'timestamp': row.block_ts, 'heuristic': identifier,
- 'metadata': {}} for row in rows]
- return rows
- def find_dar_matches(address: str) -> List[Dict[str, Any]]:
- rows: List[DepositTransaction] = DepositTransaction.query.filter(
- DepositTransaction.address == address,
- DepositTransaction.block_ts >= start_date,
- DepositTransaction.block_ts < end_date,
- ).all()
- rows: List[Dict[str, Any]] = [
- {'transaction': row.transaction, 'block': row.block_number,
- 'timestamp': row.block_ts, 'heuristic': DEPO_REUSE_HEUR,
- 'metadata': {'deposit': row.deposit}} for row in rows]
- return rows
- dar_matches: List[Dict[str, Any]] = find_dar_matches(address)
- same_addr_matches: List[Dict[str, Any]] = \
- find_tcash_matches(address, ExactMatch, SAME_ADDR_HEUR)
- gas_price_matches: List[Dict[str, Any]] = \
- find_tcash_matches(address, GasPrice, GAS_PRICE_HEUR)
- same_num_tx_matches: List[Dict[str, Any]] = \
- find_tcash_matches(address, MultiDenom, SAME_NUM_TX_HEUR)
- linked_tx_matches: List[Dict[str, Any]] = \
- find_tcash_matches(address, LinkedTransaction, LINKED_TX_HEUR)
- torn_mine_matches: List[Dict[str, Any]] = \
- find_tcash_matches(address, TornMining, TORN_MINE_HEUR)
- transactions: List[Dict[str, Any]] = \
- dar_matches + same_addr_matches + gas_price_matches + same_num_tx_matches + \
- linked_tx_matches + torn_mine_matches
- # sort by timestamp
- transactions: List[Dict[str, Any]] = sorted(transactions, key = lambda x: x['timestamp'])
- def tx_datetime_to_str(raw_transactions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
- transactions: List[Dict[str, Any]] = []
- for tx in raw_transactions:
- tx['timestamp'] = tx['timestamp'].strftime('%m/%d/%Y')
- transactions.append(tx)
- return transactions
- # remove datetime objects
- transactions: List[Dict[str, Any]] = tx_datetime_to_str(transactions)
- output: Dict[str, List[Dict[str, Any]]] = {
- 'transactions': transactions,
- 'dar_matches': dar_matches,
- 'same_addr_matches': same_addr_matches,
- 'gas_price_matches': gas_price_matches,
- 'same_num_tx_matches': same_num_tx_matches,
- 'linked_tx_matches': linked_tx_matches,
- 'torn_mine_matches': torn_mine_matches,
- }
- return output
- @app.route('/search/transaction', methods=['GET'])
- def search_transaction():
- address: str = request.args.get('address', '')
- address: str = resolve_address(address, ns)
- address: str = address.lower()
- if not is_valid_address(address):
- return default_transaction_response()
- request.args = dict(request.args)
- request.args['address'] = address
- checker: TransactionRequestChecker = TransactionRequestChecker(
- request,
- default_page = 0,
- default_limit = PAGE_LIMIT,
- default_start_date='01/01/2013',
- default_end_date=get_today_date_str(),
- )
- is_valid_request: bool = checker.check()
- output: Dict[str, Any] = default_transaction_response()
- if not is_valid_request:
- return Response(output)
- address: str = checker.get('address').lower()
- start_date: str = checker.get('start_date')
- start_date_obj: datetime = checker.get('start_date_obj')
- end_date: str = checker.get('end_date')
- end_date_obj: datetime = checker.get('end_date_obj')
- page: int = checker.get('page')
- size: int = checker.get('limit')
- request_repr: str = checker.to_str()
- if rds.exists(request_repr):
- response: str = bz2.decompress(rds.get(request_repr)).decode('utf-8')
- return Response(response=response)
- search_output: Dict[str, List[Dict[str, Any]]] = \
- _search_transaction(address, start_date_obj, end_date_obj)
- transactions: List[Dict[str, Any]] = search_output['transactions']
- stats: Dict[str, Dict[str, int]] = {
- 'num_transactions': len(transactions),
- 'num_ethereum': {
- DEPO_REUSE_HEUR: len(search_output['dar_matches']),
- },
- 'num_tcash': {
- SAME_ADDR_HEUR: len(search_output['same_addr_matches']),
- GAS_PRICE_HEUR: len(search_output['gas_price_matches']),
- SAME_NUM_TX_HEUR: len(search_output['same_num_tx_matches']),
- LINKED_TX_HEUR: len(search_output['linked_tx_matches']),
- TORN_MINE_HEUR: len(search_output['torn_mine_matches']),
- },
- }
- ranks: Dict[str, Dict[str, int]] = get_relative_rank(stats)
- stats['hovers'] = {
- 'num_transactions': 'Number of transaction reveals involving this Ethereum address',
- 'num_ethereum': 'Number of Ethereum transaction reveals based on the Deposit Address Reuse Reveal',
- 'num_tcash': 'Number of reveals by this address using Tornado Cash'
- }
- stats['num_tcash']['hovers'] = {
- SAME_ADDR_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the address match heuristic. Address match links transactions if a unique address deposits and withdraws to a Tornado Cash pool.',
- GAS_PRICE_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the unique gas price heuristic. Unique gas price links deposit and withdrawal transactions that use a unique and specific (e.g. 3.1415) gas price.',
- SAME_NUM_TX_HEUR: '# of deposit/withdrawals into tornado cash pools linked through the multi-denomination reveal. Multi-denomination reveal is when a “source” wallet mixes a specific set of denominations and your “destination” wallet withdraws them all. For example, if you mix 3x 10 ETH, 2x 1 ETH, 1x 0.1 ETH to get 32.1 ETH, you could reveal yourself within the Tornado protocol if no other wallet has mixed this exact denomination set.',
- LINKED_TX_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the linked address reveal. Linked address reveal connects wallets that interact outside of Tornado Cash.',
- TORN_MINE_HEUR: '# of deposits to/withdrawals from tornado cash pools linked through the TORN mining reveal. Careless swapping of Anonymity Points to TORN tokens reveal information of when deposits were made.',
- }
- stats['num_ethereum']['hovers'] = dict(
- DEPO_REUSE_HEUR = 'when two user addresses send to the same centralized exchange deposit address, they are linked by the deposit address reuse heuristic'
- )
- web3_resp: Dict[str, Any] = query_web3(address, w3, ns)
- addr: Optional[Address] = Address.query.filter_by(address = address).first()
- node: Optional[Embedding] = Embedding.query.filter_by(address = address).first()
- if addr is not None or node is not None:
- _, diff2vec_size, diff2vec_conf = query_diff2vec(node, address)
- tornado_dict: Dict[str, Any] = query_tornado_stats(address)
- anon_score = compute_anonymity_score(
- addr,
- ens_name = web3_resp['ens_name'],
- # seed computing anonymity score with diff2vec + tcash reveals
- extra_cluster_sizes = [
- diff2vec_size,
- tornado_dict['num_compromised']['num_compromised_exact_match'],
- tornado_dict['num_compromised']['num_compromised_gas_price'],
- tornado_dict['num_compromised']['num_compromised_multi_denom'],
- tornado_dict['num_compromised']['num_compromised_linked_tx'],
- tornado_dict['num_compromised']['num_compromised_torn_mine'],
- ],
- extra_cluster_confs = [
- diff2vec_conf,
- 1.,
- 1.,
- 0.5,
- 0.25,
- 0.25,
- ],
- )
- anon_score: float = round(anon_score, 3) # brevity is a virtue
- output['data']['query']['anonymity_score'] = anon_score
-
- # --
- output['data']['query']['address'] = address
- output['data']['query']['start_date'] = start_date
- output['data']['query']['end_date'] = end_date
- output['data']['metadata']['page'] = page
- output['data']['metadata']['limit'] = size
- output['data']['query']['metadata']['stats'] = stats
- output['data']['query']['metadata']['ranks'] = ranks
- output['data']['transactions'] = transactions
- output['success'] = 1
- response: str = json.dumps(output)
- rds.set(request_repr, bz2.compress(response.encode('utf-8'))) # add to cache
- return Response(response=response)
- @app.route('/plot/transaction', methods=['GET'])
- def make_weekly_plot():
- """
- Pass in `transactions` object from `/search/transaction` endpoint.
- We treat this as a seperate endpoint to allow for efficient repeated
- calls to this w/o requerying `/search/transaction`.
- """
- address: str = request.args.get('address', '')
- address: str = resolve_address(address, ns)
- address: str = address.lower()
- request.args = dict(request.args)
- request.args['address'] = address
- if not is_valid_address(address):
- return default_plot_response()
- window: str = request.args.get('window', '1yr')
- checker: PlotRequestChecker = PlotRequestChecker(request, default_window=window)
- is_valid_request: bool = checker.check()
- output: Dict[str, Any] = default_plot_response()
- if not is_valid_request:
- return Response(output)
- today: datetime = datetime.today()
- today: datetime = datetime.strptime(today.strftime('%m/%d/%Y'), '%m/%d/%Y')
- if window == '1mth':
- delta: relativedelta = relativedelta(months=1)
- elif window == '3mth':
- delta: relativedelta = relativedelta(months=3)
- elif window == '6mth':
- delta: relativedelta = relativedelta(months=6)
- elif window == '1yr':
- delta: relativedelta = relativedelta(months=12)
- elif window == '3yr':
- delta: relativedelta = relativedelta(months=12*3)
- elif window == '5yr':
- delta: relativedelta = relativedelta(months=12*5)
- else:
- raise Exception(f'Window {window} not supported.')
- start_date_obj: datetime = today - delta
- search_output: Dict[str, List[Dict[str, Any]]] = \
- _search_transaction(address, start_date_obj, today)
- transactions: List[Dict[str, Any]] = search_output['transactions']
- data: List[Dict[str, Any]] = []
- cur_start: datetime = copy.copy(start_date_obj)
- cur_end: datetime = cur_start + relativedelta(weeks=1)
- count: int = 0
- while cur_end <= today:
- counts: Dict[str, int] = {
- DEPO_REUSE_HEUR: 0,
- SAME_ADDR_HEUR: 0,
- GAS_PRICE_HEUR: 0,
- SAME_NUM_TX_HEUR: 0,
- LINKED_TX_HEUR: 0,
- TORN_MINE_HEUR: 0,
- }
- for transaction in transactions:
- ts: datetime = datetime.strptime(transaction['timestamp'], '%m/%d/%Y')
- if (ts >= cur_start) and (ts < cur_end):
- counts[transaction['heuristic']] += 1
- start_date: str = cur_start.strftime('%m/%d/%Y')
- end_date: str = cur_end.strftime('%m/%d/%Y')
- row: Dict[str, Any] = {
- 'index': count,
- 'start_date': start_date,
- 'end_date': end_date,
- 'name': f'{start_date}-{end_date}',
- **counts,
- }
- data.append(row)
- cur_start: datetime = copy.copy(cur_end)
- cur_end: datetime = cur_start + relativedelta(weeks=1)
- count += 1
- output['query']['window'] = window
- output['query']['start_time'] = start_date_obj.strftime('%m/%d/%Y')
- output['query']['end_time'] = today.strftime('%m/%d/%Y')
- output['query']['metadata']['num_points'] = len(data)
- output['query']['metadata']['today'] = today.strftime('%m/%d/%Y')
- output['data'] = data
- output['success'] = 1
- response: str = json.dumps(output)
- return Response(response=response)
- def get_relative_rank(my_stats: Dict[str, int]) -> Dict[str, Dict[str, int]]:
- ranks: Dict[str, Dict[str, int]] = {
- 'overall': 0,
- 'ethereum': {},
- 'tcash': {},
- 'hovers': {
- 'ethereum': 'percentile ranking of reveals by this address vs. other ethereum addresses',
- 'tcash': 'percentile ranking of reveals by this address vs. other ethereum addresses that have used Tornado Cash',
- }
- }
- overall: List[float] = []
- for heuristic in my_stats['num_ethereum']:
- rank: float = compute_rank(my_stats['num_ethereum'][heuristic], reveal_dists[heuristic])
- ranks['ethereum'][heuristic] = int(100 * rank)
- overall.append(rank)
- for heuristic in my_stats['num_ethereum']:
- ranks['ethereum'][heuristic] = str(ranks['ethereum'][heuristic]) + '%'
- for heuristic in my_stats['num_tcash']:
- rank: float = compute_rank(my_stats['num_tcash'][heuristic], reveal_dists[heuristic])
- ranks['tcash'][heuristic] = int(100 * rank)
- overall.append(rank)
- for heuristic in my_stats['num_tcash']:
- ranks['tcash'][heuristic] = str(ranks['tcash'][heuristic]) + '%'
- overall: int = int(100 * float(np.mean(overall)))
- ranks['overall'] = str(overall) + '%'
- return ranks
- def compute_rank(count: int, dist: Dict[int, int]) -> float:
- total: int = int(sum(dist.values()))
- bins: List[int] = sorted(list(dist.keys()))
- vals: List[int] = [dist[bin] for bin in bins]
- bins: np.array = np.array(bins)
- vals: np.array = np.array(vals)
- cdf: int = int(np.sum(vals[bins < count]))
- prob: float = cdf / float(total)
- return prob
|