2 Commits eaf8ff519a ... 30621c22dd

Author SHA1 Message Date
  Arne Babenhauserheide 30621c22dd polish diagrams 8 months ago
  Arne Babenhauserheide 26d7156eb1 debug and add explanation 1 year ago
2 changed files with 69 additions and 27 deletions
  1. 37 13
      fetchpull-plot.gnuplot
  2. 32 14
      fetchpull.w

+ 37 - 13
fetchpull-plot.gnuplot

@@ -2,8 +2,6 @@ set xdata time
 set datafile separator ";"
 set timefmt "%Y-%m-%d"
 set format x "%Y-%m-%d"
-# bee swarm plot disabled, since we now have enough data
-# set jitter over 0.5 spread 0.5
 # logarithmic seconds to show both realtime and bulk together
 set logscale y
 # styling
@@ -22,29 +20,52 @@ set cbtics add ("64" 7)
 set cbtics add ("128" 8)
 set cbtics add ("256" 9)
 set cbtics add ("512" 10)
-set cblabel "days since upload"
+set cblabel "age / days since upload"
+
+# add jitter for a bee swarm plot to get a better understanding of the data density without needing transparency
+# overlap is vertical binning, spread is horizontal jitter, wrap limits horizontal spread
+set jitter overlap 7 spread 0.02 wrap 1
+
+# moving average, see https://gnuplot.sourceforge.net/demo/running_avg.html and https://stackoverflow.com/a/55935923/7666
+# number of points in average
+n = 64
+array A[n]
+samples(x) = $0 > (n - 1) ? n : int($0+1)
+mod(x) = int(x) % n
+avg_n(x) = (A[mod($0)+1]=x, (sum [i=1:samples($0)] A[i]) / samples($0))
+
 # plot requests
 set title "fetchpull: requests"
 set term png size 800,600
 set output "fetchpull-get-realtime.png"
-plot "<(grep realtime\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "realtime succeeded", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc"
+plot "<(grep realtime\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "realtime succeeded", 3 title " 3 s" at end  lw 2 lc "#cccccc", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 300 title "5 min" at end lw 2 lc "#cccccc", \
+   "<(grep \\;1\\;realtime\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
 replot
 set output "fetchpull-get-small.png"
-plot "<(grep small\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "small succeeded", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc"
+plot "<(grep small\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "small succeeded", 3 title " 3 s" at end  lw 2 lc "#cccccc", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", \
+   "<(grep \\;1\\;small\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
 replot
 set output "fetchpull-get-bulk.png"
-plot "<(grep bulk\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "bulk succeeded", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd"
+plot "<(grep \\;bulk\\;\\#t fetchpull-stats-get.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "bulk succeeded", 3 title " 3 s" at end  lw 2 lc "#cccccc", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd", \
+   "<(grep \\;1\\;bulk\\;\\#t fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
 replot
+
+# failed requests get less jitter
+set jitter overlap 0.5 spread 0.1 wrap 1
+
 set title "fetchpull: failed requests"
 set term png size 800,600
 set output "fetchpull-get-failed-realtime.png"
-plot "<(grep realtime\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 pt 4 title "realtime failed", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc"
+plot "<(grep realtime\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 pt 4 title "realtime failed", 3 title " 3 s" at end  lw 2 lc "#cccccc", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 300 title "5 min" at end lw 2 lc "#cccccc", \
+   "<(grep \\;1\\;realtime\\;\\#f fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
 replot
 set output "fetchpull-get-failed-small.png"
-plot "<(grep small\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "small failed", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc"
+plot "<(grep small\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "small failed", 3 title " 3 s" at end  lw 2 lc "#cccccc", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", \
+   "<(grep \\;1\\;small\\;\\#f fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
 replot
 set output "fetchpull-get-failed-bulk.png"
-plot "<(grep bulk\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "bulk failed", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc"
+plot "<(grep bulk\\;\\#f fetchpull-stats-get.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "bulk failed", 3 title " 3 s" at end  lw 2 lc "#cccccc", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", \
+   "<(grep \\;1\\;bulk\\;\\#f fetchpull-stats-get.csv)" using 1:(avg_n($3)) title "running mean over previous 64 age 1 points" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
 replot
 # plot inserts
 set output "fetchpull-put.png"
@@ -52,7 +73,10 @@ set title "fetchpull: inserts"
 set ylabel "time to upload (seconds)"
 set xlabel "upload date"
 set cblabel "days until download"
-plot "<(grep realtime\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "realtime succeeded", "<(grep small\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "small succeeded", "<(grep bulk\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "bulk succeeded", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd"
+plot "<(grep realtime\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "realtime succeeded", "<(grep small\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "small succeeded", "<(grep bulk\\;\\#t fetchpull-stats-put.csv)" using 1:3:(log((column(4)+1))/log(2)) palette lw 1 title "bulk succeeded", 3 title " 3 s" at end  lw 2 lc "#cccccc", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd", \
+   "<(grep \\;1\\;realtime\\;\\#t fetchpull-stats-put.csv)" using 1:(avg_n($3)) title "running means, last 64, in 1 day" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan", \
+   "<(grep \\;1\\;small\\;\\#t fetchpull-stats-put.csv)" using 1:(avg_n($3)) title "" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan", \
+   "<(grep \\;1\\;bulk\\;\\#t fetchpull-stats-put.csv)" using 1:(avg_n($3)) title "" pt 7 ps 0.5 lw 2 lc rgb "dark-cyan"
 replot
 # plot inserts
 set output "fetchpull-put-failed.png"
@@ -60,11 +84,11 @@ set title "fetchpull: failed inserts"
 set ylabel "time to upload (seconds)"
 set xlabel "upload date"
 set cblabel "days until download"
-plot "<(grep realtime\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 pt 4 title "realtime failed", "<(grep small\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "small failed", "<(grep bulk\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "bulk failed", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd"
+plot "<(grep realtime\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 pt 4 title "realtime failed", "<(grep small\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "small failed", "<(grep bulk\\;\\#f fetchpull-stats-put.csv)" using 1:(column(3)<20000? column(3) : 1/0):(log((column(4)+1))/log(2)) palette lw 1 title "bulk failed", 3 title " 3 s" at end  lw 2 lc "#cccccc", 10 title "10 s" at end  lw 2 lc "#aaaaaa", 60 title "1 min" at end  lw 2 lc "#aaaaaa", 600 title "10 min" at end lw 2 lc "#cccccc", 3600 title "60 min" at end lw 2 lc "#dddddd"
 replot
 # plot max age of requests
 unset cbtics
-set yrange [1:512]
+set yrange [1:1000]
 set logscale y
 set logscale cb
 set cbtics format ""
@@ -82,7 +106,7 @@ set cbtics add ("512" 512)
 set cbtics add ("1024" 1024)
 set cbtics add ("2048" 2048)
 set cbtics add ("4096" 4096)
-set ylabel "days since upload"
+set ylabel "age / days since upload"
 set cblabel "time to download (s)"
 set title "fetchpull: lifetime download-time"
 set term png size 800,600

+ 32 - 14
fetchpull.w

@@ -775,7 +775,7 @@ KSK@...;40;32;realtime;true
   when target-filename : close-port port
 
 ;; the following is just for fun. Not ready for production. You have been warned :-)
-;; use text without quotes in tags via ,(> any text )
+;; use text without quotes in tags via ,(>- any text )
 define : ->string x
        cond
          : symbol? x
@@ -786,7 +786,7 @@ define : ->string x
            . ""
          else
            format #f "~A" x
-define-syntax-rule : > . args
+define-syntax-rule : >- . args
   string-join
     map ->string : quasiquote args
     . " "
@@ -802,11 +802,17 @@ define : website-content port
              p "These are the fetch-pull statistics. They provide an estimate of lifetimes of real files in Freenet and a somewhat early warning when network quality should degrade."
              p "Realtime are 80 bytes. Small are 128 kiB. Bulk is 1MiB."
              p "Further details are explained below the diagrams."
+             h2 "Lifetime diagrams"
+             p "Compare the success count at different ages. The age before the success count drops marks the expected lifetime."
              ,@ map : λ (attributes) : ` p : img ,attributes
                '
                  @ (src "fetchpull-lifetime-realtime-success-count.png") (alt "lifetime plot: successes per month, realtime")
                  @ (src "fetchpull-lifetime-small-success-count.png") (alt "lifetime plot: successes per month, small bulk")
                  @ (src "fetchpull-lifetime-bulk-success-count.png") (alt "lifetime plot: successes per month, large bulk")
+             h2 "Download time and upload time plots"
+             p "Compare the time to retrieve or insert a file at different ages."
+             ,@ map : λ (attributes) : ` p : img ,attributes
+               '
                  @ (src "fetchpull-get-realtime.png") (alt "fetch-pull realtime download graph")
                  @ (src "fetchpull-get-small.png") (alt "fetch-pull small download graph")
                  @ (src "fetchpull-get-bulk.png") (alt "fetch-pull bulk download graph")
@@ -815,19 +821,30 @@ define : website-content port
                  @ (src "fetchpull-get-failed-bulk.png") (alt "fetch-pull failed bulk download graph")
                  @ (src "fetchpull-put.png") (alt "fetch-pull upload graph")
                  @ (src "fetchpull-put-failed.png") (alt "fetch-pull failed upload graph")
-                 @ (src "fetchpull-lifetime-realtime.png") (alt "lifetime plot: time per download, realtime")
-                 @ (src "fetchpull-lifetime-small.png") (alt "lifetime plot: time per download, small bulk")
-                 @ (src "fetchpull-lifetime-bulk.png") (alt "lifetime plot: time per download, large bulk")
-             h2 "explanation"
-             p "Files uploaded regularly with the download attempted after some delay. 
+                 ;; @ (src "fetchpull-lifetime-realtime.png") (alt "lifetime plot: time per download, realtime")
+                 ;; @ (src "fetchpull-lifetime-small.png") (alt "lifetime plot: time per download, small bulk")
+                 ;; @ (src "fetchpull-lifetime-bulk.png") (alt "lifetime plot: time per download, large bulk")
+             h2 "Explanation"
+             h3 "Uploads and settings"
+             p "The files are uploaded regularly. Downloads are attempted after some delay.
 Realtime is uploaded with realtime priority, small and bulk with bulk priority. 
 Details are available in fetchpull.w (see sources)"
-             p "Realtime is a raw KSK without any redirect. Size 80 bytes, Uploaded and downloaded in realtime mode without compression, using all tricks to reduce latency. This is the fake chat-message: What you would use for interactive status updates and such."
-             p "Small is a KSK splitfile (a KSK that has the links to about 7 CHKs, needs 3-4). Size 128kiB uncompressed, around 80kiB compressed, Uploaded and downloaded in bulk mode."
-             p "Bulk is a KSK which forwards to a CHK splitfile that has around 40 blocks, needs about 20 to download. Size 1MiB uncompressed, around 650kiB compressed, uploaded and downloaded in bulk mode."
+             ul
+               li "Realtime is a raw KSK without any redirect. Size 80 bytes, Uploaded and downloaded in realtime mode without compression, using all tricks to reduce latency. This is the fake chat-message: What you would use for interactive status updates and such."
+               li "Small is a KSK splitfile (a KSK that has the links to about 7 CHKs, needs 3-4). Size 128kiB uncompressed, around 80kiB compressed, Uploaded and downloaded in bulk mode."
+               li "Bulk is a KSK which forwards to a CHK splitfile that has around 40 blocks, needs about 20 to download. Size 1MiB uncompressed, around 650kiB compressed, uploaded and downloaded in bulk mode. These fetchpullstats need about 1 MiB."
+             h3 "Understanding the lifetime diagrams"
+             p "On the y-axis you have the days since the upload. That means: A file is uploaded (as KSK) and then downloaded that many days later. So for example the crosses in the top line of 2019 are downloaded 128 days after they have been inserted."
+             p "The successes are aggregated per month and the color gives you the number of successful downloads in the month."
+             p "If you look at the '1 day after insert' line, you get the total number of files inserted in that month. For 2019-07 that’s for example around 80. Now you can look upwards how many downloads succeeded with longer delay."
+             p "By comparing the color at the 128-day line (above 3 months) with the color at the 1-day line, you can see how many inserts are still alive after 128 days. You can tell from that after how many days the success-count breaks down."
+             p "That we have a line at 256 for " (b "realtime") " with colors almost equal to the 64 day line means that those files are still available after 256 days."
+             p "For the " (b "small") " graph: You can see that the colors above and below the 2 weeks line are almost equal. That means that a file of 128kiB lives for at least 16 days without being accessed. Above that you see the success counts slowly falling off: more and more of the blocks are overwritten, so there’s a chance for the files do be down. After 32 days around 50 out of 80 files are still available. After 64 days, around 30 out of 80 files are still there. After 128 days most are gone."
+             p "Going to the " (b "bulk") " line you see a slightly different pattern: there is no visible difference between 4 days and 8 days, so lifetime of 1MiB files is at least 8 days, but you already see some reduction at 16 days. At 32 days up to 40 out of 80 files are still alive, but almost none survive for 64 days. The reason for that is that a 1MiB file has an intermediate CHK splitfile and once the single top-key falls out, it is dead."
+             h3 "This site"
              p "This page is generated by running " : code "./fetchpull.w --site fetchpullstats"
                  ;; the following is just for fun. Not ready for production. You have been warned :-)
-                 . " " ,(> and then ,(string-append "uploaded" " " "with") freesitemgr (from pyFreenet ,{1 + 2}) as freesite.)
+                 . " " ,(>- and then ,(string-append "uploaded" " " "with") freesitemgr (from pyFreenet ,{1 + 2}) as freesite.)
                  br
                  . "Feel free to create your own version."
              h2 "Sources"
@@ -878,6 +895,7 @@ define : copy-resources-to path
             loop : cdr files
     ;; simply copy over the plot and plotting script
     ;; FIXME: the resulting images can be empty, need to copy them manually.
+    sleep 3
     let loop : (files '("fetchpull.w" "fetchpull-plot.gnuplot" "fetchpull-get-realtime.png" "fetchpull-get-small.png" "fetchpull-get-bulk.png" "fetchpull-get-failed-realtime.png" "fetchpull-get-failed-small.png" "fetchpull-get-failed-bulk.png" "fetchpull-put.png" "fetchpull-put-failed.png" "fetchpull-lifetime-realtime.png" "fetchpull-lifetime-small.png" "fetchpull-lifetime-bulk.png" "fetchpull-lifetime-realtime-success-count.png" "fetchpull-lifetime-small-success-count.png" "fetchpull-lifetime-bulk-success-count.png"))
         when : not : null? files
             when : file-exists? : first files
@@ -909,7 +927,7 @@ define : create-site path
     write-site-to path
 
 define : final-action? args
-   if {(length args) > 1}
+   if {(length args) >= 2}
      cond 
        : equal? "--help" : second args
          help args
@@ -922,7 +940,7 @@ define : final-action? args
          test
          . #t
        : equal? "--site" : second args
-         create-site : if {(length args) > 2} (third args) "site"
+         create-site : if {(length args) >= 3} (third args) "site"
          . #t
        else #f
      . #f
@@ -930,7 +948,7 @@ define : final-action? args
     
 define : main args
   when : not : final-action? args
-    when {(length args) > 1}
+    when {(length args) >= 2}
          pretty-print : second args
          set! today : iso->time : second args
     ;; processor-put! printing-passthrough-processor