filescan 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. #! /usr/bin/env python
  2. # -*- coding: latin-1 -*-
  3. # SPDX-License-Identifier: BSD-2-Clause
  4. # Copyright 2022 the gpsd project contributors
  5. """Scans files for tabs, non-ASCII runes, and trailing whitespace on lines.
  6. .. moduleauthor:: James Browning <JamesB.fe80@GMail.com>
  7. for a file containing a line:
  8. edited. Symbols “$”, “*” “,” and “.” are not allowed to be used
  9. we get the image of:
  10. === devtools/filescan ===
  11. 10: edited. Symbols "$", "*" "," and "." are not allowed to be used
  12. EAT " !!! !!! !!! !!! !!! !!! !!! !!! 44464
  13. =================================================================================================
  14. This file it seems has some issues deliberately left in as indicated by the result block, The resulting block shown is sanitized to avoid Hall Of Mirrors type effects.
  15. #. The name of this file, as given to itself, with short horizontal rules before and after.
  16. #. A non-compliant line number, followed by a colon, some spacing, and the text of the line.
  17. #. An offense summary string for the previous line, and offense pointers
  18. # There are three recognized offenses at this point,
  19. #. **E** - Trailing whitespace
  20. #. **A** - Inclusion of non-ASCII octets
  21. #. **T** - Inclusion of tabs (horizontal )
  22. # There are 4 offense location indicators, that can be below the offending portion of the line.
  23. #. **!** indicates the offset of a non-ASCII octet.
  24. #. **4** and **6** indicate the offset of trailing whitespace.
  25. #. **"** and **6** indicate the offset of tabs.
  26. # The reason the offense location indicators seem misplaced is that tabs tend to expand to the following multiple of 8, and non-ASCII octets tend to contract to fewer runes.
  27. #. A longer horizontal rule to separate any additional files.
  28. """
  29. import sys
  30. encoding = "latin-1"
  31. pstr = str
  32. fopt = "rUb"
  33. if not str is bytes:
  34. import io
  35. fopt = "rb"
  36. def make_std_wrapper(h):
  37. return io.TextIOWrapper(
  38. h.buffer, encoding=encoding, newline="\n", line_buffering=True
  39. )
  40. def pstr(s):
  41. if not isinstance(s, (bytes, bytearray)):
  42. return str(s)
  43. return str(s, encoding=encoding)
  44. sys.stdout = make_std_wrapper(sys.stdout)
  45. sys.stderr = make_std_wrapper(sys.stderr)
  46. def pchr(c):
  47. if isinstance(c, int):
  48. return chr(c)
  49. return c
  50. def pord(c):
  51. if isinstance(c, str):
  52. return ord(c)
  53. return c
  54. def mangled(strung, mods):
  55. """Return byte-array with incremented/decremented values according to a template.
  56. #. strung:: the input byte-array
  57. #. mods:: a nested tuple-like with the mods
  58. #. top level is the group of mods
  59. #. level below that is the increment/decrement value and the list of elements to modify by index
  60. """
  61. try:
  62. result = strung
  63. for chain in mods:
  64. for point in chain[1]:
  65. result[point] += chain[0]
  66. # result[point] = pchr(int( + mod))
  67. except Exception as e:
  68. _ = locals()
  69. dicky = {}
  70. for key in "e mods strung result chain point".split():
  71. if key in _:
  72. dicky[key] = _[key]
  73. print(repr(dicky))
  74. return result
  75. def hunt_in_file(fname):
  76. """Try to open and scan a file for violations given a relative file path.
  77. #. fname:: open()able argument to find the file, a filename.
  78. the first part of the code dumps violating lines into the dumper[]
  79. . Open the file then for each line.
  80. . Sets up the empty scratch variable
  81. . Look for things on the list and stick them in the scratch variable.
  82. . Possibly tack the scratch variable onto dump.
  83. the display code checks if the dumper variable isn't empty, then.
  84. . Print the banner with the filename and short horizontal rules.
  85. . Set up variables for printing.
  86. . Slap together summary line
  87. . Create storage for the offending line.
  88. . Create/populate modification variable for mangled to modify the storage.
  89. . Print out the five-digit line number, ': ', and the given line.
  90. . Prints out a line summarizing and indicating the column of the offense
  91. . Prints out a long horizontal rule to separate anything that comes next
  92. """
  93. dumper = []
  94. long_line = 0
  95. for lnum, line in enumerate(open(fname, fopt), start=1):
  96. a_line = pstr(line)
  97. scratch = [
  98. lnum,
  99. [],
  100. a_line,
  101. [len(a_line.rstrip()), len(a_line.rstrip("\n\r"))],
  102. [],
  103. ]
  104. for col, octet in enumerate(a_line, start=1):
  105. if pord(octet) > 0x7E:
  106. scratch[1].append(col)
  107. elif pord(octet) == pord("\t"):
  108. scratch[4].append(col)
  109. if scratch[1] != [] or scratch[4] != [] or scratch[3][1] != scratch[3][0]:
  110. dumper.append(scratch)
  111. long_line = max(scratch[3][1], long_line)
  112. if dumper != []:
  113. # print(repr(dumper))
  114. sys.stdout.write("=== %s ===\n" % fname)
  115. for line in dumper:
  116. col, vec = 0, ""
  117. strung = bytearray(" " * line[3][0], encoding=encoding)
  118. strung += bytearray("4" * (1 + line[3][1] - line[3][0]), encoding=encoding)
  119. vec += "E" * (line[3][1] > line[3][0])
  120. vec += "A" * (line[1] != [])
  121. vec += "T" * (line[4] != [])
  122. strung = mangled(strung, [[2, line[4]], [1, line[1]]])
  123. sys.stdout.write("%5d: %s" % (line[0], line[2]))
  124. sys.stdout.write("%-8s%s\n" % (vec, pstr(strung)))
  125. sys.stdout.write("%s\n\n" % ("=" * (long_line + 8)))
  126. if __name__ == "__main__":
  127. """Scans files for tabs, non-ASCII runes, and trailing whitespace on lines.
  128. . Unless given the --version -V --help or -h command-line arguments
  129. . Reads filenames from the command-line arguments and then from stdin if it is not a TTY, (probably the bottom of a pipe).
  130. . If given -V or --version, prints out the wrong version number.
  131. . If given -h or --help, prints text that doesn't help.
  132. . If none of that happened drop back to the command-line silently.
  133. """
  134. import argparse
  135. parser = argparse.ArgumentParser(
  136. prog="filescan",
  137. description="Scans files for tabs, non-ASCII runes and trailing whitepsace on lines.",
  138. )
  139. parser.add_argument(
  140. "--stdin",
  141. "-S",
  142. action="store_true",
  143. default=False,
  144. help="Grab list of files from pipe",
  145. )
  146. parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.3")
  147. parser.add_argument("file_list", nargs="*", help="List of files to scan")
  148. _ = parser.parse_args()
  149. rings = [_.file_list] if "file_list" in _ else []
  150. _ = sys.stdin
  151. rings += [] if _.isatty() else [_]
  152. if rings == []:
  153. parser.parse_args(["--help"])
  154. for ring in rings:
  155. for key in ring:
  156. hunt_in_file(key.strip())