Varnish doesn't keep serving cache when the backends are down

Question

I have Varnish running behind HAProxy and in front of NGINX. The HAProxy server deals with SSL, then forwards the traffic on port 80 to Varnish, which in turn refers to the NGINX servers.

I'd like to set Varnish up so that if the NGINX servers are all down, it continues to serve the cached content until they're back up. But I don't seem to be able to get it quite right. I'm running varnish-6.0.8 revision 97e54ada6ac578af332e52b44d2038bb4fa4cd4a. My VCL version is 4.1.

Here is my configuration (sanitized):

vcl 4.1;
import directors;
import std;

# Define Server A
    backend server-a {
        .host = "xx.xx.xx.xx";
        .port = "80";
        .max_connections = 100;
        .probe = {
            .url = "/varnish-check.txt";
            .timeout = 1s;
            .interval = 5s;
            .window = 5;
            .threshold = 3;
}
}
# Define Server B
    backend server-b {
        .host = "xx.xx.xx.xx";
        .port = "80";
        .max_connections = 100;
        .probe = {
            .url = "/varnish-check.txt";
            .timeout = 1s;
            .interval = 5s;
            .window = 5;
            .threshold = 3;
}
}

# Define Server C
    backend server-c {
        .host = "xx.xx.xx.xx";
        .port = "8080";
        .max_connections = 100;
        .probe = {
            .url = "/varnish-check.txt";
            .timeout = 1s;
            .interval = 5s;
            .window = 5;
            .threshold = 3;
}
}

sub vcl_recv {
    if (req.http.host == "example.com" || req.http.host == "example2.com") {
    set req.backend_hint = server_b.backend();
} 
    elseif (req.http.host == "example3.com") {
    set req.backend_hint = server_c.backend();
}
    else {
    set req.backend_hint = server_a.backend();
}
}

acl purge {
    "localhost";
    "127.0.0.1";
    "::1";
    "xx.xx.xx.xx";
    "";
}

sub vcl_recv {

    set req.http.X-Forwarded-For = regsub(req.http.X-Forwarded-For,"^([^,]+)(,[^,]+)*","\1");

    if (req.method == "PURGE") {
    if (!client.ip ~ purge) {
    return (synth(405, "This IP is not allowed to send PURGE requests."));
}
    if (req.http.X-Purge-Method == "regex") {
    ban("obj.http.x-url ~ " + req.url + " && obj.http.x-host ~ " + req.http.host);
    return (synth(200, "Banned"));
}
    return (purge);
}
# Wordpress: don't cache these special pages
    if (req.url ~ "(wp-admin|post\.php|edit\.php|wp-login)") {
    return(pass);
}
# Wordpress: don't cache users who are logged-in or on password-protected pages
    if (req.http.Cookie ~ "wordpress_logged_in_|resetpass|wp-postpass_") {
    return(pass);
}
# Remove cookies
    set req.http.Cookie = regsuball(req.http.Cookie, "comment_author_[a-zA-Z0-9_]+", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "has_js=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "wp-settings-1=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "wp-settings-time-1=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "wordpress_test_cookie=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "PHPSESSID=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "__utm.=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "_ga=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "utmctr=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "utmcmd.=[^;]+(; )?", "");
    set req.http.Cookie = regsuball(req.http.Cookie, "utmccn.=[^;]+(; )?", "");
# Remove proxy header (see https://httpoxy.org/#mitigate-varnish)
    unset req.http.proxy;
# Normalize query arguments (sort alphabetically)
    set req.url = std.querysort(req.url);
# Strip trailing ? if it exists
    if (req.url ~ "\?$") {
    set req.url = regsub(req.url, "\?$", "");
}
# Limit requests to the following types
    if (req.method !~ "^GET|HEAD|PUT|POST|TRACE|OPTIONS|PATCH|DELETE$") {
    return (pipe);
}
# Only cache GET or HEAD requests to ensure that POST requests are always passed through, along with their cookies
    if (req.method != "GET" && req.method != "HEAD") {
    return (pass);
}
# Don't cache AJAX requests
    if (req.http.X-Requested-With == "XMLHttpRequest") {
    return(pass);
}
# Don't cache images and PDFs
    if (req.url ~ "\.(gif|jpg|jpeg|bmp|png|pdf)$") {
    return(pass);
}
# Don't cache large files (zip, audio, video, etc.)
    if (req.url ~ "^[^?]*\.(7z|avi|bz2|flac|flv|gz|mka|mkv|mov|mp3|mp4|mpeg|mpg|ogg|ogm|opus|rar|tar|tgz|tbz|txz|wav|webm|wmv|xz|zip)(\?.*)?$") {
    return (pipe);
}
# Add support for ESI
    if (req.http.Authorization) {
    return (pass);
}

# Wordpress: don't cache search results
    if (req.url ~ "/\?s=") {
    return (pass);
}
# Wordpress: don't cache REST API (hand-rolled APIs used by custom themes)
    if (req.url ~ "/shared-gc/includes/rest-api/") {
    return (pass);
}
# Wordpress: don't cache anything with a cache-breaking v= parameter (see gc.loadCachedJSON() JS function)
    if (req.url ~ "(\?|&)v=0") {
    return (pass);
}
# Don't cache the special pages we use to generate PDFs from the Wordpress catalog site
    if (req.url ~ "/generate-catalog/") {
    return (pass);
}
# Respect the browser's desire for a fresh copy on hard refresh. This ban will only work if there are no further URL changes (e.g. set req.url = ...) after it
    if (req.http.Cache-Control == "no-cache") {
    ban("req.http.host == " + req.http.host + " && req.url == " + req.url);
}
# Are there cookies left with only spaces or that are empty?
    if (req.http.cookie ~ "^\s*$") {
    unset req.http.cookie;
}
# Remove all cookies to enable caching
    unset req.http.Cookie;
    return (hash);
}

sub vcl_hash {

# Ignore marketing-related url parameters when caching urls
    set req.http.newUrl = req.url;
    if (req.http.newUrl ~ "(\?|&)(utm_source|utm_medium|utm_campaign|utm_content|gclid|fbclid|cx|ie|cof|siteurl|gc_source|mkt_tok)=") {
    set req.http.newUrl = regsuball(req.http.newUrl, "&(utm_source|utm_medium|utm_campaign|utm_content|gclid|fbclid|cx|ie|cof|siteurl|gc_source|mkt_tok)=([A-z0-9_\-\.%25]+)", "");
    set req.http.newUrl = regsuball(req.http.newUrl, "\?(utm_source|utm_medium|utm_campaign|utm_content|gclid|fbclid|cx|ie|cof|siteurl|gc_source|mkt_tok)=([A-z0-9_\-\.%25]+)", "?");
    set req.http.newUrl = regsub(req.http.newUrl, "\?&", "?");
    set req.http.newUrl = regsub(req.http.newUrl, "\?$", "");
}
# Ignore hashes when caching urls
    if (req.http.newUrl ~ "\#") {
    set req.http.newUrl = regsub(req.http.newUrl, "\#.*$", "");
}
# Default vcl_hash, except replaced "req.url" with "req.http.newUrl"
    hash_data(req.http.newUrl);
    if (req.http.host) {
    hash_data(req.http.host);
}       else {
    hash_data(server.ip);
}
    return (lookup);
}

sub vcl_backend_response {
    set beresp.http.x-url = bereq.url;
    set beresp.http.x-host = bereq.http.host;
# Set the TTL for the cache to thirty days and the grace period to twelve hours
    set beresp.ttl = 30d;
    set beresp.grace = 12h;
    set beresp.keep = 24h;
# Set different TTLs for other hosts
#   if (bereq.url ~ "(example.com|secondexample.com)") {
#       set beresp.ttl = 30d;
#}
# Set 301 and 302 as uncacheable
    if (beresp.status == 301 || beresp.status == 302) {
    set beresp.http.Location = regsub(beresp.http.Location, ":[0-9]+", "");
# Don't cache redirects
    set beresp.uncacheable = true;
}
# Cache 404 responses for five minutes (can be cleared by hard refresh)
    if (beresp.status == 403 || beresp.status == 404)
{
    set beresp.ttl = 5m;
}
# Check for the response status of background fetches from backend, and return (abandon) if the response is a “5XX” errors
    if (bereq.is_bgfetch && beresp.status >= 500 && beresp.status <= 599) {
        return (abandon);
    }
}

sub vcl_deliver {

    unset resp.http.x-url;
    unset resp.http.x-host;
# Add debug header to see if it's a HIT/MISS and the number of hits, disable when not needed
    if (obj.hits > 0) {
    set resp.http.X-Cache = "HIT";
}       else {
    set resp.http.X-Cache = "MISS";
}

    set resp.http.X-Cache-Hits = obj.hits;  

# Remove headers to improve security
    unset resp.http.X-Varnish;
    unset resp.http.Via;
    unset resp.http.X-Powered-By;
    unset resp.http.Server;
}

sub vcl_init {
    return (ok);
}

sub vcl_fini {
    return (ok);
}

sub vcl_hit {

# If the object has a TTL equal to or greater than 0s, deliver it from the cache
    if (obj.ttl >= 0s) {
    return (deliver);
}
    
# Check whether Grace Mode is necessary 
    if (std.healthy(req.backend_hint)) {
    if (obj.ttl + 10s > 0s) {
    set req.http.grace = "normal(limited)";
    return (deliver);
}       else {

# If Grace Mode is not necessary, fetch a fresh object from the backend
    return(miss);
}
}       else {

# If Grace Mode is necessary (i.e. the backend is down), enter grace mode
    if (obj.ttl + obj.grace > 0s) {
        set req.http.grace = "full";
        return (deliver);
}       else {

# If there is no Grace Mode object available, send to the backend
    return (miss);
}
}
}

This doesn't seem to work, but I can't see why. If a backend server goes down, the Varnish server quickly returns a 503 or other error. Any pointers as to what I've got wrong would be appreciated.

Relatedly, I really like the ability to clear the cache with a hard browser refresh. But it strikes me that it would be great to be able to negate that rule if the backend is down. Any idea how I go about that?

And, of course, if there are any obvious errors in here, I'd love to hear about them.

Thanks!

Varnish doesn't keep serving cache when the backends are down

Answers (1)

Total object lifetime

How to leverage grace mode for stale if error

Using vmod_stale

Related Questions

Varnish doesn&#39;t keep serving cache when the backends are down

Answers (1)

Total object lifetime

How to leverage grace mode for stale if error

Using vmod_stale

Related Questions

Varnish doesn't keep serving cache when the backends are down