media.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. import base64
  2. import mimetypes
  3. from enum import Enum
  4. from gzip import GzipFile
  5. from io import BytesIO
  6. from typing import Any
  7. import gridfs
  8. import piexif
  9. import requests
  10. from PIL import Image
  11. def load(url, user_agent):
  12. """Initializes a `PIL.Image` from the URL."""
  13. with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
  14. resp.raise_for_status()
  15. if not resp.headers.get("content-type").startswith("image/"):
  16. raise ValueError(f"bad content-type {resp.headers.get('content-type')}")
  17. resp.raw.decode_content = True
  18. return Image.open(BytesIO(resp.raw.read()))
  19. def to_data_uri(img):
  20. out = BytesIO()
  21. img.save(out, format=img.format)
  22. out.seek(0)
  23. data = base64.b64encode(out.read()).decode("utf-8")
  24. return f"data:{img.get_format_mimetype()};base64,{data}"
  25. class Kind(Enum):
  26. ATTACHMENT = "attachment"
  27. ACTOR_ICON = "actor_icon"
  28. UPLOAD = "upload"
  29. OG_IMAGE = "og"
  30. class MediaCache(object):
  31. def __init__(self, gridfs_db: str, user_agent: str) -> None:
  32. self.fs = gridfs.GridFS(gridfs_db)
  33. self.user_agent = user_agent
  34. def cache_og_image(self, url: str) -> None:
  35. if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}):
  36. return
  37. i = load(url, self.user_agent)
  38. # Save the original attachment (gzipped)
  39. i.thumbnail((100, 100))
  40. with BytesIO() as buf:
  41. with GzipFile(mode="wb", fileobj=buf) as f1:
  42. i.save(f1, format=i.format)
  43. buf.seek(0)
  44. self.fs.put(
  45. buf,
  46. url=url,
  47. size=100,
  48. content_type=i.get_format_mimetype(),
  49. kind=Kind.OG_IMAGE.value,
  50. )
  51. def cache_og_image2(self, url: str, remote_id: str) -> None:
  52. if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}):
  53. return
  54. i = load(url, self.user_agent)
  55. # Save the original attachment (gzipped)
  56. i.thumbnail((100, 100))
  57. with BytesIO() as buf:
  58. with GzipFile(mode="wb", fileobj=buf) as f1:
  59. i.save(f1, format=i.format)
  60. buf.seek(0)
  61. self.fs.put(
  62. buf,
  63. url=url,
  64. size=100,
  65. content_type=i.get_format_mimetype(),
  66. kind=Kind.OG_IMAGE.value,
  67. remote_id=remote_id,
  68. )
  69. def cache_attachment(self, url: str) -> None:
  70. if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
  71. return
  72. if (
  73. url.endswith(".png")
  74. or url.endswith(".jpg")
  75. or url.endswith(".jpeg")
  76. or url.endswith(".gif")
  77. ):
  78. i = load(url, self.user_agent)
  79. # Save the original attachment (gzipped)
  80. with BytesIO() as buf:
  81. f1 = GzipFile(mode="wb", fileobj=buf)
  82. i.save(f1, format=i.format)
  83. f1.close()
  84. buf.seek(0)
  85. self.fs.put(
  86. buf,
  87. url=url,
  88. size=None,
  89. content_type=i.get_format_mimetype(),
  90. kind=Kind.ATTACHMENT.value,
  91. )
  92. # Save a thumbnail (gzipped)
  93. i.thumbnail((720, 720))
  94. with BytesIO() as buf:
  95. with GzipFile(mode="wb", fileobj=buf) as f1:
  96. i.save(f1, format=i.format)
  97. buf.seek(0)
  98. self.fs.put(
  99. buf,
  100. url=url,
  101. size=720,
  102. content_type=i.get_format_mimetype(),
  103. kind=Kind.ATTACHMENT.value,
  104. )
  105. return
  106. def cache_attachment2(self, url: str, remote_id: str) -> None:
  107. if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
  108. return
  109. if (
  110. url.endswith(".png")
  111. or url.endswith(".jpg")
  112. or url.endswith(".jpeg")
  113. or url.endswith(".gif")
  114. ):
  115. i = load(url, self.user_agent)
  116. # Save the original attachment (gzipped)
  117. with BytesIO() as buf:
  118. f1 = GzipFile(mode="wb", fileobj=buf)
  119. i.save(f1, format=i.format)
  120. f1.close()
  121. buf.seek(0)
  122. self.fs.put(
  123. buf,
  124. url=url,
  125. size=None,
  126. content_type=i.get_format_mimetype(),
  127. kind=Kind.ATTACHMENT.value,
  128. remote_id=remote_id,
  129. )
  130. # Save a thumbnail (gzipped)
  131. i.thumbnail((720, 720))
  132. with BytesIO() as buf:
  133. with GzipFile(mode="wb", fileobj=buf) as f1:
  134. i.save(f1, format=i.format)
  135. buf.seek(0)
  136. self.fs.put(
  137. buf,
  138. url=url,
  139. size=720,
  140. content_type=i.get_format_mimetype(),
  141. kind=Kind.ATTACHMENT.value,
  142. remote_id=remote_id,
  143. )
  144. return
  145. # The attachment is not an image, download and save it anyway
  146. with requests.get(
  147. url, stream=True, headers={"User-Agent": self.user_agent}
  148. ) as resp:
  149. resp.raise_for_status()
  150. with BytesIO() as buf:
  151. with GzipFile(mode="wb", fileobj=buf) as f1:
  152. for chunk in resp.iter_content():
  153. if chunk:
  154. f1.write(chunk)
  155. buf.seek(0)
  156. self.fs.put(
  157. buf,
  158. url=url,
  159. size=None,
  160. content_type=mimetypes.guess_type(url)[0],
  161. kind=Kind.ATTACHMENT.value,
  162. remote_id=remote_id,
  163. )
  164. def cache_actor_icon(self, url: str) -> None:
  165. if self.fs.find_one({"url": url, "kind": Kind.ACTOR_ICON.value}):
  166. return
  167. i = load(url, self.user_agent)
  168. for size in [50, 80]:
  169. t1 = i.copy()
  170. t1.thumbnail((size, size))
  171. with BytesIO() as buf:
  172. with GzipFile(mode="wb", fileobj=buf) as f1:
  173. t1.save(f1, format=i.format)
  174. buf.seek(0)
  175. self.fs.put(
  176. buf,
  177. url=url,
  178. size=size,
  179. content_type=i.get_format_mimetype(),
  180. kind=Kind.ACTOR_ICON.value,
  181. )
  182. def save_upload(self, obuf: BytesIO, filename: str) -> str:
  183. # Remove EXIF metadata
  184. if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
  185. obuf.seek(0)
  186. with BytesIO() as buf2:
  187. piexif.remove(obuf.getvalue(), buf2)
  188. obuf.truncate(0)
  189. obuf.write(buf2.getvalue())
  190. obuf.seek(0)
  191. mtype = mimetypes.guess_type(filename)[0]
  192. with BytesIO() as gbuf:
  193. with GzipFile(mode="wb", fileobj=gbuf) as gzipfile:
  194. gzipfile.write(obuf.getvalue())
  195. gbuf.seek(0)
  196. oid = self.fs.put(
  197. gbuf,
  198. content_type=mtype,
  199. upload_filename=filename,
  200. kind=Kind.UPLOAD.value,
  201. )
  202. return str(oid)
  203. def cache(self, url: str, kind: Kind) -> None:
  204. if kind == Kind.ACTOR_ICON:
  205. self.cache_actor_icon(url)
  206. elif kind == Kind.OG_IMAGE:
  207. self.cache_og_image(url)
  208. else:
  209. self.cache_attachment(url)
  210. def get_actor_icon(self, url: str, size: int) -> Any:
  211. return self.get_file(url, size, Kind.ACTOR_ICON)
  212. def get_attachment(self, url: str, size: int) -> Any:
  213. return self.get_file(url, size, Kind.ATTACHMENT)
  214. def get_file(self, url: str, size: int, kind: Kind) -> Any:
  215. return self.fs.find_one({"url": url, "size": size, "kind": kind.value})