fetchpull-plot.gnuplot 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. set xdata time
  2. set datafile separator ";"
  3. set timefmt "%Y-%m-%d"
  4. set format x "%Y-%m-%d"
  5. # logarithmic seconds to show both realtime and bulk together
  6. set logscale y
  7. # styling
  8. set xtics rotate by 45 right
  9. set xlabel "retrieval date"
  10. set ylabel "time to retrieve (seconds)"
  11. set cbtics format ""
  12. set cbtics add ("0" 0)
  13. set cbtics add ("1" 1)
  14. set cbtics add ("2" 2)
  15. set cbtics add ("4" 3)
  16. set cbtics add ("8" 4)
  17. set cbtics add ("16" 5)
  18. set cbtics add ("32" 6)
  19. set cbtics add ("64" 7)
  20. set cbtics add ("128" 8)
  21. set cbtics add ("256" 9)
  22. set cbtics add ("512" 10)
  23. set cblabel "age / days since upload"
  24. # add jitter for a bee swarm plot to get a better understanding of the data density without needing transparency
  25. # overlap is vertical binning, spread is horizontal jitter, wrap limits horizontal spread
  26. set jitter overlap 7 spread 0.02 wrap 1
  27. # moving average, see https://gnuplot.sourceforge.net/demo/running_avg.html and https://stackoverflow.com/a/55935923/7666
  28. # number of points in average
  29. n = 64
  30. array A[n]
  31. samples(x) = $0 > (n - 1) ? n : int($0+1)
  32. mod(x) = int(x) % n
  33. avg_n(x) = (A[mod($0)+1]=x, (sum [i=1:samples($0)] A[i]) / samples($0))
  34. # shorter average:
  35. m = 16
  36. array B[m]
  37. samples_m(x) = $0 > (m - 1) ? m : int($0+1)
  38. mod_m(x) = int(x) % m
  39. avg_m(x) = (B[mod_m($0)+1]=x, (sum [i=1:samples_m($0)] B[i]) / samples_m($0))
  40. # plot requests
  41. set title "fetchpull: requests"
  42. set term png size 800,600
  43. set output "fetchpull-get-realtime.png"
  44. set yrange [1:]
  45. plot "<(grep realtime\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "realtime succeeded", 3 title " 3 s" at end lw 2 lc "#cccccc", 10 title "10 s" at end lw 2 lc "#aaaaaa", 60 title "1 min" at end lw 2 lc "#aaaaaa", 300 title "5 min" at end lw 2 lc "#cccccc", \
  46. "<(grep \\;0\\;realtime\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 0 points" pt 7 ps 0.5 lw 2 lc rgb "gray", \
  47. "<(grep \\;1\\;realtime\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan", \
  48. "<(grep \\;32\\;realtime\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 32 points" pt 7 ps 0.5 lw 2 lc rgb "orange"
  49. replot
  50. set output "fetchpull-get-small.png"
  51. plot "<(grep small\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "small succeeded", 3 title " 3 s" at end lw 2 lc "#cccccc", 10 title "10 s" at end lw 2 lc "#aaaaaa", 60 title "1 min" at end lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", \
  52. "<(grep \\;0\\;small\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 0 points" pt 7 ps 0.5 lw 2 lc rgb "gray", \
  53. "<(grep \\;1\\;small\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan", \
  54. "<(grep \\;32\\;small\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 32 points" pt 7 ps 0.5 lw 2 lc rgb "orange"
  55. replot
  56. set output "fetchpull-get-bulk.png"
  57. plot "<(grep \\;bulk\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "bulk succeeded", 3 title " 3 s" at end lw 2 lc "#cccccc", 10 title "10 s" at end lw 2 lc "#aaaaaa", 60 title "1 min" at end lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd", \
  58. "<(grep \\;0\\;bulk\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 0 points" pt 7 ps 0.5 lw 2 lc rgb "gray", \
  59. "<(grep \\;1\\;bulk\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan", \
  60. "<(grep \\;32\\;bulk\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 32 points" pt 7 ps 0.5 lw 2 lc rgb "orange"
  61. replot
  62. # failed requests get less jitter
  63. set jitter overlap 0.5 spread 0.1 wrap 1
  64. unset yrange
  65. set title "fetchpull: failed requests"
  66. set term png size 800,600
  67. set output "fetchpull-get-failed-realtime.png"
  68. plot "<(grep realtime\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 pt 4 title "realtime failed", 3 title " 3 s" at end lw 2 lc "#cccccc", 10 title "10 s" at end lw 2 lc "#aaaaaa", 60 title "1 min" at end lw 2 lc "#aaaaaa", 300 title "5 min" at end lw 2 lc "#cccccc", \
  69. "<(grep \\;1\\;realtime\\;\\#f fetchpull-stats-get.csv)" using 1:(avg_m($3)) title "running mean over previous 16 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
  70. replot
  71. set output "fetchpull-get-failed-small.png"
  72. plot "<(grep small\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "small failed", 3 title " 3 s" at end lw 2 lc "#cccccc", 10 title "10 s" at end lw 2 lc "#aaaaaa", 60 title "1 min" at end lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", \
  73. "<(grep \\;1\\;small\\;\\#f fetchpull-stats-get.csv)" using 1:(avg_m($3)) title "running mean over previous 16 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
  74. replot
  75. set output "fetchpull-get-failed-bulk.png"
  76. plot "<(grep bulk\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "bulk failed", 3 title " 3 s" at end lw 2 lc "#cccccc", 10 title "10 s" at end lw 2 lc "#aaaaaa", 60 title "1 min" at end lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", \
  77. "<(grep \\;1\\;bulk\\;\\#f fetchpull-stats-get.csv)" using 1:(avg_m($3)) title "running mean over previous 16 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
  78. replot
  79. # plot inserts
  80. set output "fetchpull-put.png"
  81. set title "fetchpull: inserts"
  82. set ylabel "time to upload (seconds)"
  83. set xlabel "upload date"
  84. set cblabel "days until download"
  85. plot "<(grep realtime\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "realtime succeeded", "<(grep small\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "small succeeded", "<(grep bulk\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "bulk succeeded", 3 title " 3 s" at end lw 2 lc "#cccccc", 10 title "10 s" at end lw 2 lc "#aaaaaa", 60 title "1 min" at end lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd", \
  86. "<(grep \\;1\\;realtime\\;\\#t fetchpull-stats-put.csv)" using 1:(avg_n($3)) title "running means, last 64, in 1 day" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan", \
  87. "<(grep \\;1\\;small\\;\\#t fetchpull-stats-put.csv)" using 1:(avg_n($3)) title "" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan", \
  88. "<(grep \\;1\\;bulk\\;\\#t fetchpull-stats-put.csv)" using 1:(avg_n($3)) title "" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
  89. replot
  90. # plot inserts
  91. set output "fetchpull-put-failed.png"
  92. set title "fetchpull: failed inserts"
  93. set ylabel "time to upload (seconds)"
  94. set xlabel "upload date"
  95. set cblabel "days until download"
  96. plot "<(grep realtime\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 pt 4 title "realtime failed", "<(grep small\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "small failed", "<(grep bulk\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "bulk failed", 3 title " 3 s" at end lw 2 lc "#cccccc", 10 title "10 s" at end lw 2 lc "#aaaaaa", 60 title "1 min" at end lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd"
  97. replot
  98. # plot max age of requests
  99. unset cbtics
  100. set yrange [1:1000]
  101. set logscale y
  102. set logscale cb
  103. set cbtics format ""
  104. set cbtics add ("0" 0)
  105. set cbtics add ("1" 1)
  106. set cbtics add ("2" 2)
  107. set cbtics add ("4" 4)
  108. set cbtics add ("8" 8)
  109. set cbtics add ("16" 16)
  110. set cbtics add ("32" 32)
  111. set cbtics add ("64" 64)
  112. set cbtics add ("128" 128)
  113. set cbtics add ("256" 256)
  114. set cbtics add ("512" 512)
  115. set cbtics add ("1024" 1024)
  116. set cbtics add ("2048" 2048)
  117. set cbtics add ("4096" 4096)
  118. set ylabel "age / days since upload"
  119. set cblabel "time to download (s)"
  120. set title "fetchpull: lifetime download-time"
  121. set term png size 800,600
  122. set output "fetchpull-lifetime-realtime.png"
  123. plot "<(grep realtime\\;\\#t fetchpull-stats-get.csv)" using 1:4:3 palette lw 1 title "realtime succeeded"
  124. set output "fetchpull-lifetime-small.png"
  125. plot "<(grep small\\;\\#t fetchpull-stats-get.csv)" using 1:4:3 palette lw 1 title "small succeeded"
  126. set output "fetchpull-lifetime-bulk.png"
  127. plot "<(grep bulk\\;\\#t fetchpull-stats-get.csv)" using 1:4:3 palette lw 1 title "bulk succeeded"
  128. # download time heatmaps (do not work well yet)
  129. # set view map
  130. # set dgrid3d
  131. # set pm3d interpolate 16,16
  132. # set title "fetchpull: average download time"
  133. #
  134. # set output "fetchpull-lifetime-bulk-download-time.png"
  135. # splot "<(grep 'bulk;#t' fetchpull-stats-get.csv)" using 1:4:3 palette with pm3d title "bulk succeeded"
  136. # set output "fetchpull-lifetime-small-download-time.png"
  137. # splot "<(grep 'small;#t' fetchpull-stats-get.csv)" using 1:4:3 palette with pm3d title "bulk succeeded"
  138. # success count plots
  139. set title "fetchpull: lifetime: monthly success-count"
  140. unset datafile separator
  141. unset logscale cb
  142. unset cbtics
  143. set cbtics format "%g "
  144. set cblabel "successful downloads (count)"
  145. # simple monthly binning
  146. set timefmt "%Y-%m"
  147. set format x "%Y-%m"
  148. set output "fetchpull-lifetime-realtime-success-count.png"
  149. plot "<(grep 'realtime;#t' fetchpull-stats-get.csv | sed 's/-..;/;/;s/;[^;]+//;s/;[^;]*//;s/;[^;]*//;s/;/ /g' | sort | uniq -c)" using 2:3:1 palette lw 4 title "realtime succeeded", 14 title "2 weeks" at begin lw 2 lc "#aaaaaa", 90 title "3 mo." at begin lw 2 lc "#cccccc"
  150. set output "fetchpull-lifetime-small-success-count.png"
  151. plot "<(grep 'small;#t' fetchpull-stats-get.csv | sed 's/-..;/;/;s/;[^;]+//;s/;[^;]*//;s/;[^;]*//;s/;/ /g' | sort | uniq -c)" using 2:3:1 palette lw 4 title "small succeeded", 14 title "2 weeks" at begin lw 2 lc "#aaaaaa", 90 title "3 mo." at begin lw 2 lc "#cccccc"
  152. set output "fetchpull-lifetime-bulk-success-count.png"
  153. plot "<(grep 'bulk;#t' fetchpull-stats-get.csv | sed 's/-..;/;/;s/;[^;]+//;s/;[^;]*//;s/;[^;]*//;s/;/ /g' | sort | uniq -c)" using 2:3:1 palette lw 4 title "bulk succeeded", 14 title "2 weeks" at begin lw 2 lc "#aaaaaa", 90 title "3 mo." at begin lw 2 lc "#cccccc"
  154. replot
  155. quit