2 Revīzijas 128b6a69ed ... d59a95da38

Autors SHA1 Ziņojums Datums
  ohnonot d59a95da38 Merge branch 'master' into 'master' 7 mēneši atpakaļ
  Pierre Métras d381652020 Invalidate cached HTML file when source page has changed 7 mēneši atpakaļ
2 mainītis faili ar 52 papildinājumiem un 19 dzēšanām
  1. 43 17
      PicoZCache.php
  2. 9 2
      README.md

+ 43 - 17
PicoZCache.php

@@ -6,6 +6,7 @@
  * @author Maximilian Beck before 2.0, Nepose since 2.0
  * @link https://github.com/Nepose/PicoCache
  * Improvements by various authoors, gathered by https://github.com/ohnonot (2022)
+ * PME20240329 Cache invalidate on changed source
  * @license http://opensource.org/licenses/MIT
  * @version 2.0
  */
@@ -17,6 +18,7 @@ class PicoZCache extends AbstractPicoPlugin
 
     private $doCache = true;
     private $FileName;
+    private $url;
     protected $enabled = false;
 
     public function onConfigLoaded(array &$config)
@@ -28,41 +30,66 @@ class PicoZCache extends AbstractPicoPlugin
         $this->Exclude = $this->getPluginConfig('exclude', null);
         $this->ExcludeRegex = $this->getPluginConfig('exclude_regex', null);
         $this->IgnoreQuery = $this->getPluginConfig('ignore_query', false);
-        $this->IgnoreQueryExclude = $this->getPluginConfig('ignore_query_exclude', null);
+	$this->IgnoreQueryExclude = $this->getPluginConfig('ignore_query_exclude', null);
+	$this->Invalidate = $this->getPluginConfig('invalidate', true);
     }
 
     public function onRequestUrl(&$url)
     {
-        $name = $url == "" ? "index" : $url;
+	$this->url = $url;
+    }
+
+    public function onRequestFile(&$file)
+    {
+        $name = $this->url == "" ? "index" : $this->url;
 
         // Skip cache for url matching an excluded page
-    	if($this->Exclude && in_array($name,$this->Exclude)) {
+    	if ($this->Exclude && in_array($name, $this->Exclude)) {
             $this->doCache = false;
             return;
-        }
-        // Skip cache for url matching exclude regex // untested!
-        if ($this->ExcludeRegex && preg_match($this->ExcludeRegex, $url)) {
+	}
+
+	// Skip cache for url matching exclude regex // untested!
+	//PME20240336 Regex needs '/' delimiters.
+        if ($this->ExcludeRegex && preg_match('/' . preg_quote($this->ExcludeRegex, '/') . '/', $name)) {
             $this->doCache = false;
             return;
-        }
-        // add query to name if so configured
-    	if( $this->IgnoreQuery === false || in_array($name,$this->IgnoreQueryExclude) ) {
+	}
+
+        // Add query to name if so configured
+    	if ($this->IgnoreQuery === false || in_array($name, $this->IgnoreQueryExclude)) {
             $query = (!empty($_GET)) ? '__'.md5(serialize($_GET)) : null;
-            $name = $name.$query;
+            $name = $name . $query;
         }
 
-        //replace any character except numbers and digits with a '-' to form valid file names
-        $this->FileName = $this->Dir . preg_replace('/[^A-Za-z0-9_\-]/', '_', $name) . '.html';
+        // Replace any character except numbers and digits with a '-' to form valid file names
+	//PME20230326 replace every path delimiter with a '+' to prevent cache clashes.
+	// Example: a/b_c.md and a_b/c.md are cached with the same name a_b_c.html.
+	// Now they become a+b_c.html and a_b+c.html respectively.
+	$seq = explode('/', $name);
+	$seq = array_map(fn($x): string => preg_replace('/[^A-Za-z0-9_\-]/', '_', $x), $seq);
+	$this->FileName = $this->Dir . implode('+', $seq) . '.html';
 
-        //if a cached file exists and the cacheTime is not expired, load the file and exit
-        if (file_exists($this->FileName)) {
+	// If invalidate cached file when source page has changed, delete stale
+	// cached copy when source page is more recent
+	if (file_exists($this->FileName) && $this->Invalidate && filemtime($file) > filemtime($this->FileName)) {
+	    // Delete stale cache copy
+            // echo 'filemtime(source) = ' . filemtime($file) . '<br/>';
+            // echo 'filemtime(cached) = ' . filemtime($this->FileName).'<br/>';
+            // echo 'time = '. time().'<br/>';
+            // echo 'this->Time = ' . $this->Time.'<br/>';
+            unlink($this->FileName);
+	}
+
+        // If a cached file exists and the cacheTime is not expired, load the file and exit
+	if (file_exists($this->FileName)) {
             if ($this->Time > 0) {
                 //~ echo time().'<br/>';
                 //~ echo filemtime($this->FileName).'<br/>';
                 //~ echo $this->Time.'<br/>';
                 header("Expires: " . gmdate("D, d M Y H:i:s", $this->Time + filemtime($this->FileName)) . " GMT");
                 ($this->XHTML) ? header('Content-Type: application/xhtml+xml') : header('Content-Type: text/html');
-                if(time() - filemtime($this->FileName) > $this->Time) return;
+                if (time() - filemtime($this->FileName) > $this->Time) return;
             }
             die(readfile($this->FileName));
         }
@@ -83,5 +110,4 @@ class PicoZCache extends AbstractPicoPlugin
             file_put_contents($this->FileName, $output);
         }
     }
-
-}
+}

+ 9 - 2
README.md

@@ -27,6 +27,7 @@ PicoZCache:
 
     dir: content/zcache        # Directory where cache should be saved. If your PHP has permission to do so,
                                # this can be a directory outside your webroot (leading / vs. no leading /).
+    invalidate: true           # When source page is modified, the cached HTML file is updated.
 
     expires: 0                 # Interval between caching (cached files will be overwritten after that many seconds,
                                # also sends Expires header to client). The default 0 is to never overwite cached files.
@@ -51,9 +52,11 @@ PicoZCache:
 
 ## Cache clearing
 
-To clear the cache, remove the files from the cache folder, or delete the whole cache folder (it will be recreated).
+With the `invalidate` config setting, cached HTML files are automatically updated when the source page has changed. Only the cached file is deleted before being regenerated next time the source page is accessed.
 
-I recommend to set up a little daemon script that will clear out the cache whenever content changes, e.g.:
+If you want to clear the whole cache, remove the files from the cache folder, or delete the whole cache folder (it will be recreated).
+
+Or if you want clear the whole cache when a single source has changed, you can set up a little daemon script that will clear out the cache whenever content changes. This can be necessary when some pages are created from multiple source files:
 
 ~~~ sh
 #!/bin/sh
@@ -78,6 +81,10 @@ delete "$result"
 done
 ~~~
 
+## Cache warming
+
+You can preload the cache directory with a `wget` call like `wget --spider --force-html --show-progress --recursive --no-parent https://www.example.com/`
+
 ## Common Pitfalls
 
 + Make sure the directory in which the cache directory shall be created has the appropriate permissions.