Pablo Ochoa
Pablo Ochoa

Reputation: 137

Corruption of Portainer's DB

I have a deployment of Portainer 2.14.2 and Docker Engine 20.10.7. It has been functional for quite a few months. Today I had some problems as the Portainer container (the one that is in charge of the UI, not the agent) was restarting. In one of those restarts, for an unknown reason, the database has been corrupted. Logs:

 time="2022-10-19T10:59:15Z" level=info msg="Encryption key file `portainer` not present"
 time="2022-10-19T10:59:15Z" level=info msg="Proceeding without encryption key"
 time="2022-10-19T10:59:15Z" level=info msg="Loading PortainerDB: portainer.db"
 panic: page 8 already freed

 goroutine 35 [running]:
 go.etcd.io/bbolt.(*freelist).free(0xc000728600, 0xb175, 0x7f104c311000)
       /tmp/go/pkg/mod/go.etcd.io/[email protected]/freelist.go:175 +0x2c8
 go.etcd.io/bbolt.(*node).spill(0xc000152070)
       /tmp/go/pkg/mod/go.etcd.io/[email protected]/node.go:359 +0x216
 go.etcd.io/bbolt.(*node).spill(0xc000152000)
       /tmp/go/pkg/mod/go.etcd.io/[email protected]/node.go:346 +0xaa
 go.etcd.io/bbolt.(*Bucket).spill(0xc00013e018)
       /tmp/go/pkg/mod/go.etcd.io/[email protected]/bucket.go:570 +0x33f
 go.etcd.io/bbolt.(*Tx).Commit(0xc00013e000)
       /tmp/go/pkg/mod/go.etcd.io/[email protected]/tx.go:160 +0xe7
 go.etcd.io/bbolt.(*DB).Update(0xc0001f1000?, 0xc000134ef8)
       /tmp/go/pkg/mod/go.etcd.io/[email protected]/db.go:748 +0xe5
 go.etcd.io/bbolt.(*batch).run(0xc00031c000)
       /tmp/go/pkg/mod/go.etcd.io/[email protected]/db.go:856 +0x126
 sync.(*Once).doSlow(0x0?, 0x1?)
       /opt/hostedtoolcache/go/1.18.3/x64/src/sync/once.go:68 +0xc2
 sync.(*Once).Do(...)
       /opt/hostedtoolcache/go/1.18.3/x64/src/sync/once.go:59
 go.etcd.io/bbolt.(*batch).trigger(0xc000321a00?)
       /tmp/go/pkg/mod/go.etcd.io/[email protected]/db.go:838 +0x45
 created by time.goFunc
       /opt/hostedtoolcache/go/1.18.3/x64/src/time/sleep.go:176 +0x32

My hypothesis is that in one of those restarts, the container might have been stopped in the middle of a writing procedure (although I am not 100% sure). This is the first time this has happened to me, so I don't know how to recover from this state without deploying a new Portainer stack or erasing the whole database, as this would be a really drastic solution.

If it helps this is the docker-compose:

version: "3.8"

networks:
    net:
        external: true

services:
    agent:
        image: portainer/agent:2.14.2-alpine
        environment:
            AGENT_CLUSTER_ADDR: tasks.agent
            AGENT_PORT: 9001
        volumes:
            - /var/run/docker.sock:/var/run/docker.sock:ro
            - /var/lib/docker/volumes:/var/lib/docker/volumes
        networks:
            - net
        deploy:
            mode: global
            restart_policy:
                condition: on-failure

    portainer:
        image: portainer/portainer-ce:2.14.2-alpine
        command: -H tcp://tasks.agent:9001 --tlsskipverify --admin-password-file=/run/secrets/portainer_secret
        ports:
            - "9000:9000"
            - "8000:8000"
        volumes:
            - "/var/volumes/portainer/data:/data"
        networks:
            - net
        secrets:
            - portainer_secret
            - source: ca_cert_secret
              target: /etc/ssl/certs/localCA.pem
        deploy:
            mode: replicated
            replicas: 1
            restart_policy:
                condition: on-failure
            placement:
                constraints:
                    - node.labels.stateful == true
            labels:
                - "traefik.enable=true"
                - "traefik.passHostHeader=true"
                - "traefik.http.routers.portainer.rule=Host(`portainer`)"
                - "traefik.http.services.portainer.loadbalancer.server.port=9000"
                - "traefik.http.routers.portainer.entrypoints=web"
                - "traefik.http.routers.portainer.service=portainer"
                - "traefik.http.routers.portainer.tls=true"
                - "traefik.http.routers.portainer.entrypoints=web-secure"

secrets:
    portainer_secret:
        external: true
    ca_cert_secret:
        external: true

Upvotes: 0

Views: 929

Answers (0)

Related Questions