mediacleaner.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import datetime
  5. import json
  6. import subprocess
  7. import ff
  8. def analyze_file(p):
  9. basename = os.path.basename(p)
  10. stem, ext = os.path.splitext(basename)
  11. out = {
  12. 'path': p,
  13. 'basename': basename,
  14. }
  15. # Get the actual file size.
  16. stat = os.stat(p)
  17. size = stat.st_size
  18. out['size'] = size
  19. # If empty then ffprobe will fail anyways, so we should pick this up early.
  20. if size == 0:
  21. out['title'] = basename
  22. out['error'] = 'no data'
  23. return out
  24. # Pick out the video stream data.
  25. probe = None
  26. try:
  27. probe = ff.probe_file(p)
  28. except Exception as e:
  29. out['title'] = basename
  30. out['error'] = str(e)
  31. return p
  32. props = ff.extract_stream_props(probe)
  33. if props['title'] is None:
  34. props['title'] = basename
  35. return {**out, **props}
  36. def display_analysis(a, pfx=''):
  37. if 'error' in a:
  38. print(pfx + a['path'])
  39. sz = a['size'] // 1024 // 1024
  40. print(pfx + 'sz', 'MiB')
  41. print(pfx + 'err', a['error'])
  42. return
  43. if a['title'] == a['basename']:
  44. print(pfx + a['basename'])
  45. else:
  46. print(pfx + a['basename'], '(', a['title'], ')')
  47. print(pfx + '* res', '%sx%s' % (a['res_w'], + a['res_h']), 'in', a['codec'])
  48. td = datetime.timedelta(seconds=a['duration_sec'])
  49. kbps = a['bitrate_bps'] // 1000
  50. sz = a['size'] // 1024 // 1024
  51. print(pfx + '* dur', td, 'at', kbps, 'kb/s', '(', sz, 'MiB )')
  52. MEDIA_EXT = ['.mkv', '.mp4', '.ogv', '.avi', '.wmv']
  53. def analyze_dir(dp):
  54. anas = []
  55. for f in os.listdir(dp):
  56. p = os.path.join(dp, f)
  57. if os.path.isdir(p):
  58. continue
  59. _, ext = os.path.splitext(f)
  60. if ext not in MEDIA_EXT:
  61. continue
  62. ana = analyze_file(p)
  63. anas.append(ana)
  64. return anas
  65. def main_analyze(argv):
  66. rp = os.path.expanduser(argv[0])
  67. ana = analyze_file(rp)
  68. display_analysis(ana, pfx='\t')
  69. return 0
  70. def main_comparedir(argv):
  71. dirpath = os.path.expanduser(argv[0])
  72. anas = analyze_dir(dirpath)
  73. for a in anas:
  74. display_analysis(a)
  75. def main_finddups(argv):
  76. mpath = os.path.expanduser(argv[0])
  77. dups = []
  78. weirds = []
  79. n_ok = 0
  80. for f in sorted(os.listdir(mpath)):
  81. p = os.path.join(mpath, f)
  82. if not os.path.isdir(p):
  83. continue
  84. try:
  85. anas = analyze_dir(p)
  86. except Exception as e:
  87. print('error: skipping dir with problem:', f)
  88. weirds.append((f, e))
  89. continue
  90. if len(anas) == 0:
  91. print('warn: skipped dir with no media:', f)
  92. n_ok += 1
  93. elif len(anas) == 1:
  94. print('ok: single media:', f)
  95. else:
  96. print('match: found', len(anas), 'in', f)
  97. dups.append((f, anas))
  98. print('OK:', n_ok)
  99. print('Mulitples:')
  100. for (f, dups) in dups:
  101. print('name:', f)
  102. for a in dups:
  103. display_analysis(a, pfx='\t')
  104. print('Errors:')
  105. for (f, e) in weirds:
  106. print('name:', f, str(e))
  107. def print_usage():
  108. print('usage: ./mediacleaner.py [analyze|a] <filepath>')
  109. print('usage: ./mediacleaner.py [comparedir|c] <dirpath>')
  110. print('usage: ./mediacleaner.py [finddups|d] <mediapath>')
  111. def main(argv):
  112. if len(argv) == 1:
  113. print_usage()
  114. return 1
  115. subc = argv[1]
  116. if subc == 'analyze' or subc == 'a':
  117. return main_analyze(argv[2:])
  118. if subc == 'comparedir' or subc == 'c':
  119. return main_comparedir(argv[2:])
  120. if subc == 'finddups' or subc == 'd':
  121. return main_finddups(argv[2:])
  122. print('unknown subcommand:', subc)
  123. print_usage()
  124. return 2
  125. if __name__ == '__main__':
  126. sys.exit(main(sys.argv))