batchaddmedia.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. # GNU MediaGoblin -- federated, autonomous media hosting
  2. # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
  3. #
  4. # This program is free software: you can redistribute it and/or modify
  5. # it under the terms of the GNU Affero General Public License as published by
  6. # the Free Software Foundation, either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU Affero General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Affero General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. from __future__ import print_function
  17. import codecs
  18. import csv
  19. import os
  20. import requests
  21. import six
  22. from six.moves.urllib.parse import urlparse
  23. from mediagoblin.gmg_commands import util as commands_util
  24. from mediagoblin.submit.lib import (
  25. submit_media, get_upload_file_limits,
  26. FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
  27. from mediagoblin.tools.metadata import compact_and_validate
  28. from mediagoblin.tools.translate import pass_to_ugettext as _
  29. from jsonschema.exceptions import ValidationError
  30. def parser_setup(subparser):
  31. subparser.description = """\
  32. This command allows the administrator to upload many media files at once."""
  33. subparser.epilog = _(u"""For more information about how to properly run this
  34. script (and how to format the metadata csv file), read the MediaGoblin
  35. documentation page on command line uploading
  36. <http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
  37. subparser.add_argument(
  38. 'username',
  39. help=_(u"Name of user these media entries belong to"))
  40. subparser.add_argument(
  41. 'metadata_path',
  42. help=_(
  43. u"""Path to the csv file containing metadata information."""))
  44. subparser.add_argument(
  45. '--celery',
  46. action='store_true',
  47. help=_(u"Don't process eagerly, pass off to celery"))
  48. def batchaddmedia(args):
  49. # Run eagerly unless explicetly set not to
  50. if not args.celery:
  51. os.environ['CELERY_ALWAYS_EAGER'] = 'true'
  52. app = commands_util.setup_app(args)
  53. files_uploaded, files_attempted = 0, 0
  54. # get the user
  55. user = app.db.User.query.filter_by(username=args.username.lower()).first()
  56. if user is None:
  57. print(_(u"Sorry, no user by username '{username}' exists".format(
  58. username=args.username)))
  59. return
  60. upload_limit, max_file_size = get_upload_file_limits(user)
  61. temp_files = []
  62. if os.path.isfile(args.metadata_path):
  63. metadata_path = args.metadata_path
  64. else:
  65. error = _(u'File at {path} not found, use -h flag for help'.format(
  66. path=args.metadata_path))
  67. print(error)
  68. return
  69. abs_metadata_filename = os.path.abspath(metadata_path)
  70. abs_metadata_dir = os.path.dirname(abs_metadata_filename)
  71. upload_limit, max_file_size = get_upload_file_limits(user)
  72. def maybe_unicodeify(some_string):
  73. # this is kinda terrible
  74. if some_string is None:
  75. return None
  76. else:
  77. return six.text_type(some_string)
  78. with codecs.open(
  79. abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
  80. contents = all_metadata.read()
  81. media_metadata = parse_csv_file(contents)
  82. for media_id, file_metadata in media_metadata.iteritems():
  83. files_attempted += 1
  84. # In case the metadata was not uploaded initialize an empty dictionary.
  85. json_ld_metadata = compact_and_validate({})
  86. # Get all metadata entries starting with 'media' as variables and then
  87. # delete them because those are for internal use only.
  88. original_location = file_metadata['location']
  89. ### Pull the important media information for mediagoblin from the
  90. ### metadata, if it is provided.
  91. title = file_metadata.get('title') or file_metadata.get('dc:title')
  92. description = (file_metadata.get('description') or
  93. file_metadata.get('dc:description'))
  94. license = file_metadata.get('license')
  95. try:
  96. json_ld_metadata = compact_and_validate(file_metadata)
  97. except ValidationError as exc:
  98. error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg}
  99. Metadata was not uploaded.""".format(
  100. media_id=media_id,
  101. error_path=exc.path[0],
  102. error_msg=exc.message))
  103. print(error)
  104. continue
  105. url = urlparse(original_location)
  106. filename = url.path.split()[-1]
  107. if url.scheme == 'http':
  108. res = requests.get(url.geturl(), stream=True)
  109. media_file = res.raw
  110. elif url.scheme == '':
  111. path = url.path
  112. if os.path.isabs(path):
  113. file_abs_path = os.path.abspath(path)
  114. else:
  115. file_path = os.path.join(abs_metadata_dir, path)
  116. file_abs_path = os.path.abspath(file_path)
  117. try:
  118. media_file = file(file_abs_path, 'r')
  119. except IOError:
  120. print(_(u"""\
  121. FAIL: Local file {filename} could not be accessed.
  122. {filename} will not be uploaded.""".format(filename=filename)))
  123. continue
  124. try:
  125. submit_media(
  126. mg_app=app,
  127. user=user,
  128. submitted_file=media_file,
  129. filename=filename,
  130. title=maybe_unicodeify(title),
  131. description=maybe_unicodeify(description),
  132. license=maybe_unicodeify(license),
  133. metadata=json_ld_metadata,
  134. tags_string=u"",
  135. upload_limit=upload_limit, max_file_size=max_file_size)
  136. print(_(u"""Successfully submitted {filename}!
  137. Be sure to look at the Media Processing Panel on your website to be sure it
  138. uploaded successfully.""".format(filename=filename)))
  139. files_uploaded += 1
  140. except FileUploadLimit:
  141. print(_(
  142. u"FAIL: This file is larger than the upload limits for this site."))
  143. except UserUploadLimit:
  144. print(_(
  145. "FAIL: This file will put this user past their upload limits."))
  146. except UserPastUploadLimit:
  147. print(_("FAIL: This user is already past their upload limits."))
  148. print(_(
  149. "{files_uploaded} out of {files_attempted} files successfully submitted".format(
  150. files_uploaded=files_uploaded,
  151. files_attempted=files_attempted)))
  152. def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
  153. # csv.py doesn't do Unicode; encode temporarily as UTF-8:
  154. # TODO: this probably won't be necessary in Python 3
  155. csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
  156. dialect=dialect, **kwargs)
  157. for row in csv_reader:
  158. # decode UTF-8 back to Unicode, cell by cell:
  159. yield [six.text_type(cell, 'utf-8') for cell in row]
  160. def utf_8_encoder(unicode_csv_data):
  161. for line in unicode_csv_data:
  162. yield line.encode('utf-8')
  163. def parse_csv_file(file_contents):
  164. """
  165. The helper function which converts the csv file into a dictionary where each
  166. item's key is the provided value 'id' and each item's value is another
  167. dictionary.
  168. """
  169. list_of_contents = file_contents.split('\n')
  170. key, lines = (list_of_contents[0].split(','),
  171. list_of_contents[1:])
  172. objects_dict = {}
  173. # Build a dictionary
  174. for index, line in enumerate(lines):
  175. if line.isspace() or line == u'': continue
  176. values = unicode_csv_reader([line]).next()
  177. line_dict = dict([(key[i], val)
  178. for i, val in enumerate(values)])
  179. media_id = line_dict.get('id') or index
  180. objects_dict[media_id] = (line_dict)
  181. return objects_dict