extract-background.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. #!/usr/bin/env python3
  2. # pdf2htmlEX creates images inline the HTML as <img src="data:" />
  3. # This script:
  4. # - removes the src="" attribute
  5. # - creates a new bg-N class for the img tag
  6. # - copies the images into the CSS
  7. import bleach
  8. import glob
  9. import os
  10. import re
  11. import sys
  12. from pyquery import PyQuery as pq
  13. # Must be given a HTML file (output of pdf2htmlEX)
  14. if len (sys.argv) == 1:
  15. exit ()
  16. # Which item to process
  17. html_file = sys.argv[1]
  18. # The CSS file of the document
  19. css_file = html_file[:-5] + '.images.css'
  20. if not os.path.isfile (html_file):
  21. print ("Input file doesn't exist in the library.")
  22. exit ()
  23. # Store all the base64 images here
  24. images = {}
  25. # Remove base64 image from <img src="">
  26. def background_to_css (index, element):
  27. new_css_class = 'bg-' + str (index)
  28. images['.' + new_css_class] = element.attr.src
  29. element.remove_attr ('alt')
  30. element.remove_attr ('src')
  31. element.add_class (new_css_class)
  32. # Read HTML file
  33. with open (html_file, 'rt', encoding='utf-8') as f:
  34. dom = pq (f.read ())
  35. # Open sidebar
  36. if len (dom ('#outline > ul > li')) > 0:
  37. dom ('#sidebar').addClass ('opened')
  38. # Loop all images
  39. dom ('#page-container img.bi').each (lambda index, item: background_to_css (index, dom (item)))
  40. # Overwrite HTML file with removed images
  41. with open (html_file, 'wt', encoding='utf-8') as f:
  42. # f.write (dom ('#sidebar').outer_html ())
  43. f.write (dom ('#page-container').outer_html ())
  44. # Append new CSS classes (with the images) to the document CSS file
  45. with open (css_file, 'at', encoding='utf-8') as f:
  46. for image, base64 in images.items ():
  47. f.write (image + '{')
  48. f.write ('background-image: url(' + base64 + ');')
  49. f.write ('background-position: center;')
  50. f.write ('background-repeat: no-repeat;')
  51. f.write ('background-size: cover;')
  52. f.write ('}')