chingari.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import itertools
  2. import json
  3. import urllib.parse
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. clean_html,
  8. int_or_none,
  9. str_to_int,
  10. url_or_none,
  11. )
  12. class ChingariBaseIE(InfoExtractor):
  13. def _get_post(self, id, post_data):
  14. media_data = post_data['mediaLocation']
  15. base_url = media_data['base']
  16. author_data = post_data.get('authorData', {})
  17. song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author'
  18. formats = [{
  19. 'format_id': frmt,
  20. 'width': str_to_int(frmt[1:]),
  21. 'url': base_url + frmt_path,
  22. } for frmt, frmt_path in media_data.get('transcoded', {}).items()]
  23. if media_data.get('path'):
  24. formats.append({
  25. 'format_id': 'original',
  26. 'format_note': 'Direct video.',
  27. 'url': base_url + '/apipublic' + media_data['path'],
  28. 'quality': 10,
  29. })
  30. timestamp = str_to_int(post_data.get('created_at'))
  31. if timestamp:
  32. timestamp = int_or_none(timestamp, 1000)
  33. thumbnail, uploader_url = None, None
  34. if media_data.get('thumbnail'):
  35. thumbnail = base_url + media_data.get('thumbnail')
  36. if author_data.get('username'):
  37. uploader_url = 'https://chingari.io/' + author_data.get('username')
  38. return {
  39. 'id': id,
  40. 'extractor_key': ChingariIE.ie_key(),
  41. 'extractor': 'Chingari',
  42. 'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
  43. 'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
  44. 'duration': media_data.get('duration'),
  45. 'thumbnail': url_or_none(thumbnail),
  46. 'like_count': post_data.get('likeCount'),
  47. 'view_count': post_data.get('viewsCount'),
  48. 'comment_count': post_data.get('commentCount'),
  49. 'repost_count': post_data.get('shareCount'),
  50. 'timestamp': timestamp,
  51. 'uploader_id': post_data.get('userId') or author_data.get('_id'),
  52. 'uploader': author_data.get('name'),
  53. 'uploader_url': url_or_none(uploader_url),
  54. 'track': song_data.get('title'),
  55. 'artist': song_data.get('author'),
  56. 'formats': formats,
  57. }
  58. class ChingariIE(ChingariBaseIE):
  59. _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
  60. _TESTS = [{
  61. 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
  62. 'info_dict': {
  63. 'id': '612f8f4ce1dc57090e8a7beb',
  64. 'ext': 'mp4',
  65. 'title': 'Happy birthday Srila Prabhupada',
  66. 'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa',
  67. 'duration': 0,
  68. 'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg',
  69. 'like_count': int,
  70. 'view_count': int,
  71. 'comment_count': int,
  72. 'repost_count': int,
  73. 'timestamp': 1630506828,
  74. 'upload_date': '20210901',
  75. 'uploader_id': '5f0403982c8bd344f4813f8c',
  76. 'uploader': 'ISKCON,Inc.',
  77. 'uploader_url': 'https://chingari.io/iskcon,inc',
  78. 'track': None,
  79. 'artist': None,
  80. },
  81. 'params': {'skip_download': True}
  82. }]
  83. def _real_extract(self, url):
  84. id = self._match_id(url)
  85. post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id)
  86. if post_json['code'] != 200:
  87. raise ExtractorError(post_json['message'], expected=True)
  88. post_data = post_json['data']
  89. return self._get_post(id, post_data)
  90. class ChingariUserIE(ChingariBaseIE):
  91. _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
  92. _TESTS = [{
  93. 'url': 'https://chingari.io/dada1023',
  94. 'info_dict': {
  95. 'id': 'dada1023',
  96. },
  97. 'params': {'playlistend': 3},
  98. 'playlist': [{
  99. 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
  100. 'info_dict': {
  101. 'id': '614781f3ade60b3a0bfff42a',
  102. 'ext': 'mp4',
  103. 'title': '#chingaribappa ',
  104. 'description': 'md5:d1df21d84088770468fa63afe3b17857',
  105. 'duration': 7,
  106. 'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg',
  107. 'like_count': int,
  108. 'view_count': int,
  109. 'comment_count': int,
  110. 'repost_count': int,
  111. 'timestamp': 1632076275,
  112. 'upload_date': '20210919',
  113. 'uploader_id': '5efc4b12cca35c3d1794c2d3',
  114. 'uploader': 'dada (girish) dhawale',
  115. 'uploader_url': 'https://chingari.io/dada1023',
  116. 'track': None,
  117. 'artist': None
  118. },
  119. 'params': {'skip_download': True}
  120. }, {
  121. 'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba',
  122. 'info_dict': {
  123. 'id': '6146b132bcbf860959e12cba',
  124. 'ext': 'mp4',
  125. 'title': 'Tactor harvesting',
  126. 'description': 'md5:8403f12dce68828b77ecee7eb7e887b7',
  127. 'duration': 59.3,
  128. 'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg',
  129. 'like_count': int,
  130. 'view_count': int,
  131. 'comment_count': int,
  132. 'repost_count': int,
  133. 'timestamp': 1632022834,
  134. 'upload_date': '20210919',
  135. 'uploader_id': '5efc4b12cca35c3d1794c2d3',
  136. 'uploader': 'dada (girish) dhawale',
  137. 'uploader_url': 'https://chingari.io/dada1023',
  138. 'track': None,
  139. 'artist': None
  140. },
  141. 'params': {'skip_download': True}
  142. }, {
  143. 'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82',
  144. 'info_dict': {
  145. 'id': '6145651b74cb030a64c40b82',
  146. 'ext': 'mp4',
  147. 'title': '#odiabhajan ',
  148. 'description': 'md5:687ea36835b9276cf2af90f25e7654cb',
  149. 'duration': 56.67,
  150. 'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg',
  151. 'like_count': int,
  152. 'view_count': int,
  153. 'comment_count': int,
  154. 'repost_count': int,
  155. 'timestamp': 1631937819,
  156. 'upload_date': '20210918',
  157. 'uploader_id': '5efc4b12cca35c3d1794c2d3',
  158. 'uploader': 'dada (girish) dhawale',
  159. 'uploader_url': 'https://chingari.io/dada1023',
  160. 'track': None,
  161. 'artist': None
  162. },
  163. 'params': {'skip_download': True}
  164. }],
  165. }, {
  166. 'url': 'https://chingari.io/iskcon%2Cinc',
  167. 'playlist_mincount': 1025,
  168. 'info_dict': {
  169. 'id': 'iskcon%2Cinc',
  170. },
  171. }]
  172. def _entries(self, id):
  173. skip = 0
  174. has_more = True
  175. for page in itertools.count():
  176. posts = self._download_json('https://api.chingari.io/users/getPosts', id,
  177. data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(),
  178. headers={'content-type': 'application/json;charset=UTF-8'},
  179. note='Downloading page %s' % page)
  180. for post in posts.get('data', []):
  181. post_data = post['post']
  182. yield self._get_post(post_data['_id'], post_data)
  183. skip += 20
  184. has_more = posts['hasMoreData']
  185. if not has_more:
  186. break
  187. def _real_extract(self, url):
  188. alt_id = self._match_id(url)
  189. post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id)
  190. if post_json['code'] != 200:
  191. raise ExtractorError(post_json['message'], expected=True)
  192. id = post_json['data']['_id']
  193. return self.playlist_result(self._entries(id), playlist_id=alt_id)