kamitkami.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. '''
  2. KamitKami gives a smaller css to be included with html file for faster website loading times.
  3. Copyright (C) 2021 Sagar Acharya
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU Affero General Public License as
  6. published by the Free Software Foundation, either version 3 of the
  7. License.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <https://www.gnu.org/licenses/>.
  14. '''
  15. import sys, os, re
  16. from bs4 import BeautifulSoup
  17. from scrapy.selector import Selector
  18. from scrapy.http import HtmlResponse
  19. # changes working directory to script directory
  20. os.chdir(os.getcwd())
  21. # get arguments from terminal to list
  22. names = sys.argv[1:]
  23. # list which keeps some attributes as they are within css file
  24. nofilter = ["*", "html", "body", ":root", "head"]
  25. # checks whether number of arguments are correct
  26. if len(names)!=2:
  27. print("Please input only 2 arguments, css filename and html filename")
  28. sys.exit()
  29. with open(names[0]+'.css') as temp:
  30. css = temp.read()
  31. with open(names[1]+'.html') as temp:
  32. html = temp.read()
  33. def divide_elem_blocks(css_string):
  34. _import = ["@import" + x + ";" for x in re.findall(r'@import(.*?);', css_string)]
  35. for i in _import:
  36. css_string = css_string.replace(i, "")
  37. selectors = []
  38. blocks = []
  39. num_curly = 0
  40. start = False
  41. record = ["", ""]
  42. for i in css_string:
  43. if start:
  44. if (i == '{'):
  45. num_curly +=1
  46. record[1] +=i
  47. elif (i == '}' and num_curly == 0):
  48. selectors.append(record[0])
  49. blocks.append(record[1])
  50. record = ["", ""]
  51. start = False
  52. elif (i == '}' and num_curly > 0):
  53. num_curly -=1
  54. else:
  55. record[1]+=i
  56. else:
  57. if (i == '{'):
  58. start = True
  59. else:
  60. record[0]+=i
  61. if (i == '{' and num_curly == 0):
  62. start = True
  63. return _import, selectors, blocks
  64. # filters C type comments
  65. css = re.sub('\/\*(\*(?!\/)|[^*])*\*\/', '', css)
  66. css = css.replace("\n", "")
  67. # selectors and blocks have same length and correspond to each other
  68. _import, selectors, blocks = divide_elem_blocks(css)
  69. #Scrapy used here to check if a particular css selector is used in html
  70. response = HtmlResponse(url = 'http://mysite.com', body = html,encoding='utf-8')
  71. print(response.css('card-container').get()==None)
  72. def check_css_in_html(response,selector):
  73. try:
  74. if response.css(selector).get()!=None:
  75. return True
  76. except:pass
  77. return False
  78. # filters innermost content between { & }
  79. #css = re.sub('\{[^{}]*\}', '', css)
  80. #css = re.sub('\{[^{}]*\}', '', css)
  81. # skip everything till next comment, take html variable as input and using python-scrapy, find relevant css here, change things at if statement at end
  82. soup = BeautifulSoup(html, 'html.parser')
  83. classes = []
  84. tags = []
  85. type_elem = []
  86. for i in soup.find_all(True):
  87. tags.append(i.name)
  88. try:
  89. for j in i.attrs['class']:
  90. classes.append('.'+j)
  91. except:
  92. pass
  93. try:
  94. type_elem.append('[type="'+i.attrs['type']+'"')
  95. except:
  96. pass
  97. #found unique list of tags, classes, types used in document (id yet to do)
  98. classes = list(set(classes))
  99. tags = [e for e in list(set(tags)) if e not in ('br', 'meta', 'div', 'link', 'html', 'body', 'head')]
  100. type_elem = list(set(type_elem))
  101. #keep all selectors:blocks of nofilter list
  102. # https://www.w3schools.com/cssref/css_selectors.asp
  103. # Find relevant css tags and according to above formatting,
  104. # filter irrelevant ones and write keys:blocks to file below.
  105. # class,class .class.class
  106. # [hidden]
  107. # :: , :
  108. # [type="class"]
  109. # :hover, active, etc.
  110. # [^=] [*=]
  111. # :not(:something)
  112. with open(names[1]+"_"+names[0]+".css", 'w') as f:
  113. for i in _import:
  114. f.write(i)
  115. for i in selectors:
  116. #we split the selectors into individual and check them in html
  117. selector = i.split(',')
  118. for sel in selector:
  119. if (check_css_in_html(response,sel)):
  120. #if even one selector is present in html then we put entry into the output css
  121. print(i+" selected")
  122. f.write(i)
  123. f.write('{')
  124. f.write(blocks[selectors.index(i)])
  125. f.write("}\n")
  126. break
  127. #minify