score-updater.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. #!/usr/bin/env python3
  2. import json
  3. import os
  4. import re
  5. import shutil
  6. import sys
  7. import threading
  8. import time
  9. import bs4
  10. import requests
  11. seriesRequestComplete = False
  12. majorSeries = set()
  13. majorMatches = {}
  14. minorMatches = set()
  15. backupFileName = os.path.join(os.path.dirname(sys.argv[0]), "scores.json")
  16. fileName = os.path.join(os.path.dirname(sys.argv[0]), "../noball/scores.json")
  17. matchData = {}
  18. rssContent = None
  19. def check():
  20. global seriesRequestComplete
  21. try:
  22. websiteContent = requests.get("http://www.espncricinfo.com/?edition-view=espncricinfo-en-ww").text
  23. finally:
  24. seriesRequestComplete = True
  25. soup = bs4.BeautifulSoup(websiteContent, 'html.parser')
  26. majorSeries_list = []
  27. for link in soup.find_all("a", attrs={
  28. "class": "quicklinks_list__link",
  29. "name": lambda name: name.startswith('&lpos=cricket:keyseries:') and not name.endswith(':app')
  30. }):
  31. try:
  32. majorSeries_list.append(re.match('/series/_/id/([0-9]+)/', link['href']).groups()[0])
  33. except AttributeError:
  34. pass
  35. majorSeries_list = majorSeries_list[:-3 + len(majorSeries_list) // 2]
  36. global majorSeries
  37. majorSeries = set(majorSeries_list)
  38. global majorMatches
  39. global minorMatches
  40. for match, series in majorMatches.items():
  41. if series not in majorSeries:
  42. minorMatches.add(match)
  43. majorMatches.pop(match)
  44. rssContent_local = bs4.BeautifulSoup(requests.get("http://static.espncricinfo.com/rss/livescores.xml").text, 'html.parser')
  45. matchUrlList = set()
  46. for match in rssContent_local.find_all('item'):
  47. url = match.guid.get_text().strip()
  48. matchUrlList.add(url)
  49. match.link['href'] = url
  50. global rssContent
  51. rssContent = rssContent_local
  52. curMatches = set(majorMatches.keys()).union(minorMatches)
  53. for match in matchUrlList - curMatches:
  54. try:
  55. seriesId, matchId = getIds(match)
  56. except:
  57. minorMatches.add(match)
  58. continue
  59. if seriesId in majorSeries:
  60. majorMatches[match] = seriesId
  61. index = max(list(matchData.keys()) + [ -1 ]) + 1
  62. matchData[index] = { 'url': match, 'status': rssContent.find('link', href=match).parent.title.get_text() }
  63. threading.Thread(target=startUpdater, args=(match, matchId, seriesId, index)).start()
  64. else:
  65. minorMatches.add(match)
  66. for match in curMatches - matchUrlList:
  67. try:
  68. majorMatches.pop(match)
  69. except KeyError:
  70. minorMatches.remove(match)
  71. with open(backupFileName, 'w') as _file:
  72. json.dump(list(matchData.values()), _file)
  73. shutil.copy2(backupFileName, fileName)
  74. os.remove(backupFileName)
  75. def getIds(match):
  76. return re.search("/series/([0-9]+)/[^/]+/([0-9]+)", requests.get(match).url).groups()
  77. def startUpdater(matchUrl, matchId, seriesId, key):
  78. page = 1
  79. while True:
  80. try:
  81. matchSummary = rssContent.find('link', href=matchUrl).parent.title.get_text()
  82. if re.search(" [*]( |$)", matchSummary) and not re.search(" [&] ( |$)", matchSummary):
  83. apiLink = "http://site.api.espn.com/apis/site/v2/sports/cricket/{}/playbyplay".format(seriesId)
  84. innings = 1
  85. ballByBallData = [ [] ]
  86. cgiParams = { "contentorigin": "espn",
  87. "lang": "en",
  88. "event": matchId,
  89. "page": page
  90. }
  91. commentaryData = requests.get(apiLink, cgiParams).json()['commentary']
  92. pageCount = commentaryData['pageCount']
  93. if pageCount > 1:
  94. page = pageCount
  95. cgiParams['page'] = page
  96. commentaryData = requests.get(apiLink, cgiParams).json()['commentary']
  97. lastBall = next(ball for ball in reversed(commentaryData['items']) if 'overs' in ball['over'])
  98. innings = lastBall['innings']
  99. runs = innings['runs']
  100. wickets = innings['wickets']
  101. target = innings['target']
  102. over = lastBall['over']
  103. limit = over['limit']
  104. batsman = lastBall['batsman']
  105. otherBatsman = lastBall['otherBatsman']
  106. bowler = lastBall['bowler']
  107. score = "{}{} ({}{} ov{}) ({}{} {}*, {} {}/{})".format(runs, "/{}".format(wickets) if wickets < 10 else "", over['overs'], "/{:.0f}".format(limit) if limit > 0 else "", ", target {}".format(target) if target > 0 else "", "{} {}*, ".format(batsman['athlete']['displayName'], batsman['totalRuns']) if not lastBall['dismissal']['dismissal'] else "", otherBatsman['athlete']['displayName'], otherBatsman['totalRuns'], bowler['athlete']['displayName'], bowler['wickets'], bowler['conceded'])
  108. ballsRemaining = innings['remainingBalls']
  109. battingTeam = lastBall['team']['displayName']
  110. bowlingTeam = bowler['team']['displayName']
  111. result = None
  112. if target > 0:
  113. remaining = " (with {} remaining)".format("{} ov".format(innings['remainingOvers']) if ballsRemaining > 90 else "{} balls".format(ballsRemaining)) if limit > 0 else ""
  114. if runs >= target:
  115. result = "{} won by {} wickets{}".format(battingTeam, 10 - wickets, remaining)
  116. elif ballsRemaining == 0 or wickets == 10:
  117. if runs == target - 1:
  118. result = "Match tied"
  119. else:
  120. result = "{} won by {} runs".format(bowlingTeam, target - runs - 1)
  121. else:
  122. result = "{} need {} runs to win{}".format(battingTeam, target - runs, remaining)
  123. matchSummary = re.sub("[^ ]+ +[*]", score.replace('\\', '\\\\'), matchSummary)
  124. if result:
  125. matchSummary += " - " + result
  126. matchData[key]['status'] = matchSummary
  127. except Exception as e:
  128. print("{} - {}: {}".format(time.time(), type(e), e))
  129. finally:
  130. time.sleep(10)
  131. if matchUrl not in majorMatches:
  132. matchData.pop(key)
  133. break
  134. while True:
  135. checkThread = threading.Thread(target=check, args=[])
  136. checkThread.start()
  137. while not seriesRequestComplete:
  138. time.sleep(.01)
  139. seriesRequestComplete = False
  140. nextThreadStart = time.time() + 20
  141. checkThread.join()
  142. time.sleep(max(0, nextThreadStart - time.time()))