comments.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. from youtube import proto, util, yt_data_extract
  2. from youtube.util import (
  3. concat_or_none,
  4. strip_non_ascii
  5. )
  6. from youtube import yt_app
  7. import settings
  8. import json
  9. import base64
  10. import flask
  11. from flask import request
  12. # Here's what I know about the secret key (starting with ASJN_i)
  13. # *The secret key definitely contains the following information (or perhaps the information is stored at youtube's servers):
  14. # -Video id
  15. # -Offset
  16. # -Sort
  17. # *If the video id or sort in the ctoken contradicts the ASJN, the response is an error. The offset encoded outside the ASJN is ignored entirely.
  18. # *The ASJN is base64 encoded data, indicated by the fact that the character after "ASJN_i" is one of ("0", "1", "2", "3")
  19. # *The encoded data is not valid protobuf
  20. # *The encoded data (after the 5 or so bytes that are always the same) is indistinguishable from random data according to a battery of randomness tests
  21. # *The ASJN in the ctoken provided by a response changes in regular intervals of about a second or two.
  22. # *Old ASJN's continue to work, and start at the same comment even if new comments have been posted since
  23. # *The ASJN has no relation with any of the data in the response it came from
  24. def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
  25. video_id = proto.as_bytes(video_id)
  26. secret_key = proto.as_bytes(secret_key)
  27. page_info = proto.string(4, video_id) + proto.uint(6, sort)
  28. offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
  29. if secret_key:
  30. offset_information = proto.string(1, secret_key) + offset_information
  31. page_params = proto.string(2, video_id)
  32. if lc:
  33. page_params += proto.string(6, proto.percent_b64encode(proto.string(15, lc)))
  34. result = proto.nested(2, page_params) + proto.uint(3, 6) + proto.nested(6, offset_information)
  35. return base64.urlsafe_b64encode(result).decode('ascii')
  36. def request_comments(ctoken, replies=False):
  37. url = 'https://m.youtube.com/youtubei/v1/next'
  38. url += '?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
  39. data = json.dumps({
  40. 'context': {
  41. 'client': {
  42. 'hl': 'en',
  43. 'gl': 'US',
  44. 'clientName': 'MWEB',
  45. 'clientVersion': '2.20240328.08.00',
  46. },
  47. },
  48. 'continuation': ctoken.replace('=', '%3D'),
  49. })
  50. content = util.fetch_url(
  51. url, headers=util.mobile_xhr_headers + util.json_header, data=data,
  52. report_text='Retrieved comments', debug_name='request_comments')
  53. content = content.decode('utf-8')
  54. polymer_json = json.loads(content)
  55. return polymer_json
  56. def single_comment_ctoken(video_id, comment_id):
  57. page_params = proto.string(2, video_id) + proto.string(
  58. 6, proto.percent_b64encode(proto.string(15, comment_id)))
  59. result = proto.nested(2, page_params) + proto.uint(3, 6)
  60. return base64.urlsafe_b64encode(result).decode('ascii')
  61. def post_process_comments_info(comments_info):
  62. for comment in comments_info['comments']:
  63. comment['author'] = strip_non_ascii(comment['author'])
  64. comment['author_url'] = concat_or_none(
  65. '/', comment['author_url'])
  66. comment['author_avatar'] = concat_or_none(
  67. settings.img_prefix, comment['author_avatar'])
  68. comment['permalink'] = concat_or_none(
  69. util.URL_ORIGIN, '/watch?v=',
  70. comments_info['video_id'],
  71. '&lc=', comment['id']
  72. )
  73. reply_count = comment['reply_count']
  74. comment['replies_url'] = None
  75. if comment['reply_ctoken']:
  76. # change max_replies field to 250 in ctoken
  77. ctoken = comment['reply_ctoken']
  78. ctoken, err = proto.set_protobuf_value(
  79. ctoken,
  80. 'base64p', 6, 3, 9, value=200)
  81. if err:
  82. print('Error setting ctoken value:')
  83. print(err)
  84. comment['replies_url'] = None
  85. comment['replies_url'] = concat_or_none(
  86. util.URL_ORIGIN,
  87. '/comments?replies=1&ctoken=' + ctoken)
  88. if reply_count == 0:
  89. comment['view_replies_text'] = 'Reply'
  90. elif reply_count == 1:
  91. comment['view_replies_text'] = '1 reply'
  92. else:
  93. comment['view_replies_text'] = str(reply_count) + ' replies'
  94. if comment['approx_like_count'] == '1':
  95. comment['likes_text'] = '1 like'
  96. else:
  97. comment['likes_text'] = (str(comment['approx_like_count'])
  98. + ' likes')
  99. comments_info['include_avatars'] = settings.enable_comment_avatars
  100. if comments_info['ctoken']:
  101. ctoken = comments_info['ctoken']
  102. if comments_info['is_replies']:
  103. replies_param = '&replies=1'
  104. # change max_replies field to 250 in ctoken
  105. new_ctoken, err = proto.set_protobuf_value(
  106. ctoken,
  107. 'base64p', 6, 3, 9, value=200)
  108. if err:
  109. print('Error setting ctoken value:')
  110. print(err)
  111. else:
  112. ctoken = new_ctoken
  113. else:
  114. replies_param = ''
  115. comments_info['more_comments_url'] = concat_or_none(
  116. util.URL_ORIGIN, '/comments?ctoken=', ctoken, replies_param)
  117. if comments_info['offset'] is None:
  118. comments_info['page_number'] = None
  119. else:
  120. comments_info['page_number'] = int(comments_info['offset']/20) + 1
  121. if not comments_info['is_replies']:
  122. comments_info['sort_text'] = 'top' if comments_info['sort'] == 0 else 'newest'
  123. comments_info['video_url'] = concat_or_none(
  124. util.URL_ORIGIN, '/watch?v=', comments_info['video_id'])
  125. comments_info['video_thumbnail'] = concat_or_none(
  126. settings.img_prefix, 'https://i.ytimg.com/vi/',
  127. comments_info['video_id'], '/hqdefault.jpg'
  128. )
  129. def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
  130. try:
  131. if settings.comments_mode:
  132. comments_info = {'error': None}
  133. other_sort_url = (
  134. util.URL_ORIGIN + '/comments?ctoken='
  135. + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
  136. )
  137. other_sort_text = 'Sort by ' + ('newest' if sort == 0 else 'top')
  138. this_sort_url = (util.URL_ORIGIN
  139. + '/comments?ctoken='
  140. + make_comment_ctoken(video_id, sort=sort, lc=lc))
  141. comments_info['comment_links'] = [
  142. (other_sort_text, other_sort_url),
  143. ('Direct link', this_sort_url)
  144. ]
  145. ctoken = make_comment_ctoken(video_id, sort, offset, lc)
  146. comments_info.update(yt_data_extract.extract_comments_info(
  147. request_comments(ctoken), ctoken=ctoken
  148. ))
  149. post_process_comments_info(comments_info)
  150. return comments_info
  151. else:
  152. return {}
  153. except util.FetchError as e:
  154. if e.code == '429' and settings.route_tor:
  155. comments_info['error'] = 'Error: YouTube blocked the request because the Tor exit node is overutilized.'
  156. if e.error_message:
  157. comments_info['error'] += '\n\n' + e.error_message
  158. comments_info['error'] += '\n\nExit node IP address: %s' % e.ip
  159. else:
  160. comments_info['error'] = 'YouTube blocked the request. IP address: %s' % e.ip
  161. except Exception as e:
  162. comments_info['error'] = 'YouTube blocked the request. IP address: %s' % e.ip
  163. if comments_info.get('error'):
  164. print('Error retrieving comments for ' + str(video_id) + ':\n' +
  165. comments_info['error'])
  166. return comments_info
  167. @yt_app.route('/comments')
  168. def get_comments_page():
  169. ctoken = request.args.get('ctoken', '')
  170. replies = request.args.get('replies', '0') == '1'
  171. comments_info = yt_data_extract.extract_comments_info(
  172. request_comments(ctoken, replies), ctoken=ctoken
  173. )
  174. post_process_comments_info(comments_info)
  175. if not replies:
  176. if comments_info['sort'] is None or comments_info['video_id'] is None:
  177. other_sort_url = None
  178. else:
  179. other_sort_url = (
  180. util.URL_ORIGIN
  181. + '/comments?ctoken='
  182. + make_comment_ctoken(comments_info['video_id'],
  183. sort=1-comments_info['sort'])
  184. )
  185. other_sort_text = 'Sort by ' + ('newest' if comments_info['sort'] == 0 else 'top')
  186. comments_info['comment_links'] = [(other_sort_text, other_sort_url)]
  187. return flask.render_template(
  188. 'comments_page.html',
  189. comments_info=comments_info,
  190. slim=request.args.get('slim', False)
  191. )