rajkumar11
rajkumar11

Reputation: 95

how to do proper indexing while joining in couchbase?

i have one couchbase document in the format as below of type "organization" in contact bucket.

"contact": {
  "extendedData": [],
  "id": "organization_2_1094",
  "organizationId": 1094,
  "organizationName": "SMART COMSSS",
  "organizationRoles": [
    {
      "addressAssociations": [
        {
          "activeDate": "2019-08-08T03:51:51.417Z",
          "addressAssocTypeId": -2,
          "addressId": 749,
          "ownershipStatus": 1,
          "srvAddressStatus": 1
        }
      ],
      "extendedData": [
        {
          "characteristicId": "1",
          "characteristicValue": "Plant_Id",
          "extendedDataId": "400"
        },
        {
          "characteristicId": "2",
          "characteristicValue": "Plant_Type",
          "extendedDataId": "401"
        }
      ],
      "name": "Store1",
      "organizationRoleId": 928,
      "roleSpecId": -103,
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    },
    {
      "name": "changed",
      "organizationRoleId": 929,
      "roleSpecId": -104,
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    },
    {
      "addressAssociations": [
        {
          "activeDate": "2019-08-08T23:06:49.748Z",
          "addressAssocTypeId": -2,
          "addressId": 752,
          "ownershipStatus": 1,
          "srvAddressStatus": 1
        }
      ],
      "extendedData": [
        {
          "characteristicId": "1",
          "characteristicValue": "Plant_Id",
          "extendedDataId": "402"
        },
        {
          "characteristicId": "2",
          "characteristicValue": "Plant_Type",
          "extendedDataId": "403"
        }
      ],
      "name": "store11",
      "organizationRoleId": 930,
      "partyRoleAssocs": [
        {
          "partyRoleAssocId": "531"
        }
      ],
      "relevantEntityId": "S_103",
      "roleSpecId": -103,
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    },
    {
      "name": "new store group",
      "organizationRoleId": 931,
      "partyRoleAssocs": [
        {
          "partyRoleAssocId": "532"
        }
      ],
      "roleSpecId": -103,
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    },
    {
      "name": "child store",
      "organizationRoleId": 932,
      "partyRoleAssocs": [
        {
          "partyRoleAssocId": "533"
        }
      ],
      "roleSpecId": -103,
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    },
    {
      "name": "MCOTEST",
      "organizationRoleId": 933,
      "roleSpecId": -104,
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    },
    {
      "name": "new store child",
      "organizationRoleId": 934,
      "partyRoleAssocs": [
        {
          "partyRoleAssocId": "534"
        }
      ],
      "roleSpecId": -103,
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    },
    {
      "name": "new store child",
      "organizationRoleId": 935,
      "partyRoleAssocs": [
        {
          "partyRoleAssocId": "535"
        },
        {
          "partyRoleAssocId": "565",
          "toRoleId": 936
        }
      ],
      "roleSpecId": -103,
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    },
    {
      "organizationRoleId": 936,
      "partyRoleAssocs": [
        {
          "toRoleId": 935
        }
      ],
      "statusId": 1,
      "statusLastChangedDate": "2019-08-22T12:27:51.077Z"
    }
  ],
  "statusId": 1,
  "statusLastChangedDate": "2019-08-22T09:27:51.072Z",
  "tenantId": "2",
  "type": "organization"
  }

and another couchbase document of type "address" in bucket contact

 {
"contact": {
  "address1": "stret",
  "addressId": "1000",
  "city": "miryalguda",
  "countryCode": 4,
  "id": "address_2_1000",
  "state": "ap",
  "tenantId": "2",
  "type": "address",
  "zip": "500070"
       }
}

Now i have made a query to get some data from organization document and some data from address document by joining documents on addressId as per my requirement. Below is the query:

SELECT orgrole.name, orgrole.organizationRoleId,orgrole.externalIds as externalIds,orgrole.roleSpecId,a.city as city, a.zip as zip,{a.addressId,a.address1,a.address2,a.address3,a.country,a.city, a.zip,a.state,a.postalCode,a.houseNumber,a.streetName,a.fxGeocode,a.isActive} as address FROM `optima_contact` AS contact UNNEST contact.organizationRoles AS orgrole UNNEST orgrole.addressAssociations AS aa JOIN `optima_contact` AS a ON aa.addressId = TO_NUMBER(a.addressId)
WHERE  contact.type = "organization" AND a.type = "address"

and i have made one index for right hand side of join.

 CREATE INDEX `ix2` ON `optima_contact`(`addressId`,`address1`,`address2`,`address3`,`country`,`city`,`zip`,`state`,`postalCode`,`houseNumber`,`streetName`,`fxGeocode`,`isActive`) WHERE (`type` = "address")

Is there any way to still improve the query execution time for this? As the number of documents for type organization are 10 and for type "address" are more than 70000, i am getting timeout while query execution

Upvotes: 2

Views: 160

Answers (1)

vsr
vsr

Reputation: 7414

https://blog.couchbase.com/ansi-join-support-n1ql/

Options :

Change query to ON TO_STRING(aa.addressId) = a.addressId
Or change index to CREATE INDEX `ix3` ON `optima_contact`(TO_NUMBER(`addressId`), `addressId`,`address1`,`address2`,`address3`,`country`,`city`,`zip`,`state`,`postalCode`,`houseNumber`,`streetName`,`fxGeocode`,`isActive`) WHERE (`type` = "address")
Use Hash JOIN in EE. 
Change the JOIN order
Try one of the following 

First one should solve your problem. ON clause exact matches with RHS index key it will push the value otherwise it needs to fetch all the index keys and apply predicate on query side.

CB 6.5.0

WITH contact AS (SELECT orgrole.name, orgrole.organizationRoleId,
                        orgrole.externalIds AS externalIds, orgrole.roleSpecId,
                        aa.addressId
                 FROM optima_contact AS c
                 UNNEST c.organizationRoles AS orgrole
                 UNNEST orgrole.addressAssociations AS aa WHERE c.type = "organization")
SELECT {a.addressId,a.address1,a.address2,a.address3,a.country,a.city,
        a.zip,a.state,a.postalCode,a.houseNumber,a.streetName,a.fxGeocode,a.isActive} AS address,
        c1.name, c1.organizationRoleId,
        c1.externalIds, c1.roleSpecId, a.city AS city, a.zip AS zip
FROM optima_contact AS a
UNNEST contact AS c1
WHERE a.type = "address" AND TO_NUMBER(a.addressId) = c1.addressId;

PRE 6.5

SELECT {a.addressId,a.address1,a.address2,a.address3,a.country,a.city,
        a.zip,a.state,a.postalCode,a.houseNumber,a.streetName,a.fxGeocode,a.isActive} AS address,
        c1.name, c1.organizationRoleId,
        c1.externalIds, c1.roleSpecId, a.city AS city, a.zip AS zip
FROM optima_contact AS a
UNNEST (SELECT orgrole.name, orgrole.organizationRoleId,
                        orgrole.externalIds AS externalIds, orgrole.roleSpecId,
                        aa.addressId
                 FROM optima_contact AS c
                 UNNEST c.organizationRoles AS orgrole
                 UNNEST orgrole.addressAssociations AS aa WHERE c.type = "organization") AS c1
WHERE a.type = "address" AND TO_NUMBER(a.addressId) = c1.addressId;

OR

SELECT a AS address, c1.name, c1.organizationRoleId, c1.externalIds, c1.roleSpecId
FROM optima_contact AS a
UNNEST (SELECT orgrole.name, orgrole.organizationRoleId,
                        orgrole.externalIds AS externalIds, orgrole.roleSpecId,
                        orgrole.addressAssociations[*].addressId AS addresses
                 FROM optima_contact AS c
                 UNNEST c.organizationRoles AS orgrole
                 WHERE c.type = "organization") AS c1
WHERE a.type = "address" AND TO_NUMBER(a.addressId) IN c1.addresses;

Upvotes: 4

Related Questions