avalus
avalus

Reputation: 67

Failover not working in static cluster HA

I have problem with configuration two Artemis servers in cluster with HA.

Goals:

  1. Start two servers in cluster and one is in live state and second is backup. Only the live server should get messages when he is running.
  2. Two servers should shared configuration and messages(when one failed)

Problem:

When I stopped the live server using using ctrl-c the slave reported Connection failure to localhost/127.0.0.1:61616 has been detected: AMQ219015: The connection was disconnected because of server shutdown [code=DISCONNECTED] which is correct, but the backup server did not change his state and didn't listen on port 61617. So what I did wrong in config?

Live server config:

<?xml version='1.0'?>
<configuration xmlns="urn:activemq"
               xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
               xmlns:xi="http://www.w3.org/2001/XInclude"
               xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd">

   <core xmlns="urn:activemq:core" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="urn:activemq:core ">

      <name>0.0.0.0</name>

      <persistence-enabled>true</persistence-enabled>

      <!-- this could be ASYNCIO, MAPPED, NIO
           ASYNCIO: Linux Libaio
           MAPPED: mmap files
           NIO: Plain Java Files
       -->

      <paging-directory>data/paging</paging-directory>

      <bindings-directory>data/bindings</bindings-directory>

      <journal-directory>data/journal</journal-directory>

      <large-messages-directory>data/large-messages</large-messages-directory>

      <security-enabled>false</security-enabled>

      <connectors>
         <connector name="netty-connector">tcp://localhost:61616</connector>
         <!-- connector to the server1 -->
         <connector name="server1-connector">tcp://localhost:61617</connector>
      </connectors>

      <!-- Acceptors -->
      <acceptors>
         <acceptor name="netty-acceptor">tcp://localhost:61616</acceptor>
      </acceptors>

      <ha-policy>
         <shared-store>
            <master>
               <check-for-live-server>true</check-for-live-server>
            </master>   
         </shared-store>
      </ha-policy>

      <cluster-connections>
         <cluster-connection name="my-cluster">
            <connector-ref>netty-connector</connector-ref>
            <retry-interval>500</retry-interval>
            <use-duplicate-detection>true</use-duplicate-detection>
            <message-load-balancing>STRICT</message-load-balancing>

            <max-hops>1</max-hops>

            <static-connectors>
               <connector-ref>server1-connector</connector-ref>
            </static-connectors>
         </cluster-connection>
      </cluster-connections>

      <security-settings>
          <security-setting match="#">
            <permission type="createNonDurableQueue" roles="amq"/>
            <permission type="deleteNonDurableQueue" roles="amq"/>
            <permission type="createDurableQueue" roles="amq"/>
            <permission type="deleteDurableQueue" roles="amq"/>
            <permission type="createAddress" roles="amq"/>
            <permission type="deleteAddress" roles="amq"/>
            <permission type="consume" roles="amq"/>
            <permission type="browse" roles="amq"/>
            <permission type="send" roles="amq"/>
            <!-- we need this otherwise ./artemis data imp wouldn't work -->
            <permission type="manage" roles="amq"/>
         </security-setting>
         <security-setting match="clusterTopic">
            <permission type="createNonDurableQueue" roles="amq"/>
            <permission type="deleteNonDurableQueue" roles="amq"/>
            <permission type="createDurableQueue" roles="amq"/>
            <permission type="deleteDurableQueue" roles="amq"/>
            <permission type="createAddress" roles="amq"/>
            <permission type="deleteAddress" roles="amq"/>
            <permission type="consume" roles="amq"/>
            <permission type="browse" roles="amq"/>
            <permission type="send" roles="amq"/>
            <!-- we need this otherwise ./artemis data imp wouldn't work -->
            <permission type="manage" roles="amq"/>
         </security-setting>
         <security-setting match="clusterQueue">
            <permission type="createNonDurableQueue" roles="amq"/>
            <permission type="deleteNonDurableQueue" roles="amq"/>
            <permission type="createDurableQueue" roles="amq"/>
            <permission type="deleteDurableQueue" roles="amq"/>
            <permission type="createAddress" roles="amq"/>
            <permission type="deleteAddress" roles="amq"/>
            <permission type="consume" roles="amq"/>
            <permission type="browse" roles="amq"/>
            <permission type="send" roles="amq"/>
            <!-- we need this otherwise ./artemis data imp wouldn't work -->
            <permission type="manage" roles="amq"/>
         </security-setting>
      </security-settings>

      <address-settings>
         <!-- if you define auto-create on certain queues, management has to be auto-create -->
         <address-setting match="activemq.management#">
            <dead-letter-address>DLQ</dead-letter-address>
            <expiry-address>ExpiryQueue</expiry-address>
            <redelivery-delay>0</redelivery-delay>
            <!-- with -1 only the global-max-size is in use for limiting -->
            <max-size-bytes>-1</max-size-bytes>
            <message-counter-history-day-limit>10</message-counter-history-day-limit>
            <address-full-policy>PAGE</address-full-policy>
            <auto-create-queues>true</auto-create-queues>
            <auto-create-addresses>true</auto-create-addresses>
            <auto-create-jms-queues>true</auto-create-jms-queues>
            <auto-create-jms-topics>true</auto-create-jms-topics>
         </address-setting>
         <!--default for catch all-->
         <address-setting match="#">
            <dead-letter-address>DLQ</dead-letter-address>
            <expiry-address>ExpiryQueue</expiry-address>
            <redelivery-delay>0</redelivery-delay>
            <!-- with -1 only the global-max-size is in use for limiting -->
            <max-size-bytes>-1</max-size-bytes>
            <message-counter-history-day-limit>10</message-counter-history-day-limit>
            <address-full-policy>PAGE</address-full-policy>
            <auto-create-queues>true</auto-create-queues>
            <auto-create-addresses>true</auto-create-addresses>
            <auto-create-jms-queues>true</auto-create-jms-queues>
            <auto-create-jms-topics>true</auto-create-jms-topics>
         </address-setting>
      </address-settings>

      <addresses>
         <address name="DLQ">
            <anycast>
               <queue name="DLQ" />
            </anycast>
         </address>
         <address name="ExpiryQueue">
            <anycast>
               <queue name="ExpiryQueue" />
            </anycast>
         </address>
      <address name="clusterTopic">
            <multicast>
               <queue name="clusterQueue" />
            </multicast>
         </address>
      </addresses>
   </core>
</configuration>

Backup server config:

<?xml version='1.0'?>
<configuration xmlns="urn:activemq"
               xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
               xmlns:xi="http://www.w3.org/2001/XInclude"
               xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd">

   <core xmlns="urn:activemq:core" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="urn:activemq:core ">

        <name>0.0.0.0</name>

        <persistence-enabled>true</persistence-enabled>

        <!-- this could be ASYNCIO, MAPPED, NIO
           ASYNCIO: Linux Libaio
           MAPPED: mmap files
           NIO: Plain Java Files
       -->

        <paging-directory>../brokerC0/data/paging</paging-directory>

        <bindings-directory>../brokerC0/data/bindings</bindings-directory>

        <journal-directory>../brokerC0/data/journal</journal-directory>

        <large-messages-directory>../brokerC0/data/large-messages</large-messages-directory>

        <security-enabled>false</security-enabled>

        <connectors>
            <connector name="netty-connector">tcp://localhost:61617</connector>
            <!-- connector to the server0 -->
            <connector name="server0-connector">tcp://localhost:61616</connector>
        </connectors>

        <!-- Acceptors -->
        <acceptors>
            <acceptor name="netty-acceptor">tcp://localhost:61617</acceptor>
        </acceptors>

        <ha-policy>
           <shared-store>
              <slave>
                 <allow-failback>true</allow-failback>
              </slave>
           </shared-store>
        </ha-policy>

        <!-- Clustering configuration -->
        <cluster-connections>
            <cluster-connection name="my-cluster">
                <connector-ref>netty-connector</connector-ref>
                <retry-interval>500</retry-interval>
                <use-duplicate-detection>true</use-duplicate-detection>
                <message-load-balancing>STRICT</message-load-balancing>
                <max-hops>1</max-hops>
                <static-connectors>
                    <connector-ref>server0-connector</connector-ref>
                </static-connectors>
            </cluster-connection>
        </cluster-connections>

        <security-settings>
            <security-setting match="#">
                <permission type="createNonDurableQueue" roles="amq"/>
                <permission type="deleteNonDurableQueue" roles="amq"/>
                <permission type="createDurableQueue" roles="amq"/>
                <permission type="deleteDurableQueue" roles="amq"/>
                <permission type="createAddress" roles="amq"/>
                <permission type="deleteAddress" roles="amq"/>
                <permission type="consume" roles="amq"/>
                <permission type="browse" roles="amq"/>
                <permission type="send" roles="amq"/>
                <!-- we need this otherwise ./artemis data imp wouldn't work -->
                <permission type="manage" roles="amq"/>
            </security-setting>
            <security-setting match="clusterTopic">
                <permission type="createNonDurableQueue" roles="amq"/>
                <permission type="deleteNonDurableQueue" roles="amq"/>
                <permission type="createDurableQueue" roles="amq"/>
                <permission type="deleteDurableQueue" roles="amq"/>
                <permission type="createAddress" roles="amq"/>
                <permission type="deleteAddress" roles="amq"/>
                <permission type="consume" roles="amq"/>
                <permission type="browse" roles="amq"/>
                <permission type="send" roles="amq"/>
                <!-- we need this otherwise ./artemis data imp wouldn't work -->
                <permission type="manage" roles="amq"/>
            </security-setting>
            <security-setting match="clusterQueue">
                <permission type="createNonDurableQueue" roles="amq"/>
                <permission type="deleteNonDurableQueue" roles="amq"/>
                <permission type="createDurableQueue" roles="amq"/>
                <permission type="deleteDurableQueue" roles="amq"/>
                <permission type="createAddress" roles="amq"/>
                <permission type="deleteAddress" roles="amq"/>
                <permission type="consume" roles="amq"/>
                <permission type="browse" roles="amq"/>
                <permission type="send" roles="amq"/>
                <!-- we need this otherwise ./artemis data imp wouldn't work -->
                <permission type="manage" roles="amq"/>
            </security-setting>
        </security-settings>

        <address-settings>
            <!-- if you define auto-create on certain queues, management has to be auto-create -->
            <address-setting match="activemq.management#">
                <dead-letter-address>DLQ</dead-letter-address>
                <expiry-address>ExpiryQueue</expiry-address>
                <redelivery-delay>0</redelivery-delay>
                <!-- with -1 only the global-max-size is in use for limiting -->
                <max-size-bytes>-1</max-size-bytes>
                <message-counter-history-day-limit>10</message-counter-history-day-limit>
                <address-full-policy>PAGE</address-full-policy>
                <auto-create-queues>true</auto-create-queues>
                <auto-create-addresses>true</auto-create-addresses>
                <auto-create-jms-queues>true</auto-create-jms-queues>
                <auto-create-jms-topics>true</auto-create-jms-topics>
            </address-setting>
            <!--default for catch all-->
            <address-setting match="#">
                <dead-letter-address>DLQ</dead-letter-address>
                <expiry-address>ExpiryQueue</expiry-address>
                <redelivery-delay>0</redelivery-delay>
                <!-- with -1 only the global-max-size is in use for limiting -->
                <max-size-bytes>-1</max-size-bytes>
                <message-counter-history-day-limit>10</message-counter-history-day-limit>
                <address-full-policy>PAGE</address-full-policy>
                <auto-create-queues>true</auto-create-queues>
                <auto-create-addresses>true</auto-create-addresses>
                <auto-create-jms-queues>true</auto-create-jms-queues>
                <auto-create-jms-topics>true</auto-create-jms-topics>
            </address-setting>
        </address-settings>

        <addresses>
            <address name="DLQ">
                <anycast>
                    <queue name="DLQ" />
                </anycast>
            </address>
            <address name="ExpiryQueue">
                <anycast>
                    <queue name="ExpiryQueue" />
                </anycast>
            </address>
            <address name="clusterTopic">
                <multicast>
                    <queue name="clusterQueue" />
                </multicast>
            </address>
        </addresses>
   </core>
</configuration>

Upvotes: 0

Views: 1175

Answers (1)

Justin Bertram
Justin Bertram

Reputation: 35217

If you stop the broker gracefully (e.g. with ctrl-c) then failover will not occur by default. To trigger failover with a graceful shutdown you need to configure the master broker like so:

<ha-policy>
   <shared-store>
      <master>
         <failover-on-shutdown>true</failover-on-shutdown>
      </master>  
   </shared-store>
</ha-policy>

And the slave like so:

<ha-policy>
   <shared-store>
      <slave>
         <failover-on-shutdown>true</failover-on-shutdown>
      </slave>  
   </shared-store>
</ha-policy>

Otherwise you can just kill the broker (e.g. using kill -9 <pid>) to trigger failover.

Upvotes: 1

Related Questions