Reputation: 1380
I have an array of keys in a CouchDB view, [doc.time, doc.address]
. Neither is unique. doc.time
is a UNIX timestamp and doc.address
is a string. The reduce
function is set to _sum
as the only value for each set of keys is a number.
What I want is to filter by doc.time
, then group the remaining records by doc.address
. If I put doc.time
as the first key, I cannot seem to group by unique addresses no matter what I specify as a group_level
. If I put doc.address
first, I cannot seem to filter the query by time.
Query: ?group_level=1&startkey=[0,1230000000]&endkey=[{},1340000000]
First Key: doc.address
before doc.time
Problem: Does not filter by time
Code:
rows: [
{
key: [ "1126GDuGLQTX3LFHHmjCctdn8WKDjn7QNA" ],
value: 50
},
{
key: [ "112AobLhjLJQ3LGqXFrsdnWMPqWCQqoiS6" ],
value: 50
}
]
Query: ?group_level=1&startkey=[1230000000]&endkey=[1340000000,{}]
First Key: doc.time
before doc.address
Problem: Cannot see and I am not grouped by doc.address
Code:
rows: [
{
key: [ 1231469665 ],
value: 50
},
{
key: [ 1231469744 ],
value: 50
}
]
Upvotes: 0
Views: 3075
Reputation: 7952
You mentioned that:
... If I put
doc.time
as the first key, I cannot seem to group by unique addresses no matter what I specify as a group_level ...
The query parameter group_level=N
splits the string on the Nth
comma and groups the left elements together by string match. Therefore, When your array key is like this: [doc.time, doc.address]
, you won't be able to group by address
, which is not on the left side of the comma.
... If I put
doc.address
first, I cannot seem to filter the query by time ...
When your array key is like: [doc.address, doc.time]
, notice that you are emitting an array key inside your Map function. You need to consider the following points regarding array key or compound key in CouchDB:
Described on this reference:
... First thing of note and very important ... an array output ... from the javascript Map function ... each of those Index Keys are strings, and are ordered character by character as strings, including the brackets and commas ...
The above statement and explanations on the reference have a significant impact on how CouchDB indexing works in the case of compound key or array key.
To clarify, lets create documents like below on a sample
database:
{"time":"2011","address":"CT"}
{"time":"2012","address":"CT"}
...
{"time":"2011","address":"TX"}
...
{"time":"2015","address":"TX"}
...
{"time":"2014","address":"NY"}
...
{"time":"2014","address":"CA"}
{"time":"2015","address":"CA"}
{"time":"2016","address":"CA"}
I implemented a view map function like this:
function (doc) {
if(doc.time && doc.address){
emit([doc.address, doc.time], null);
}
}
For now, I'm not using any Reduce function, because, lets ignore any grouping or reducing and focus on plain simple indexing. The above view is generating the following key/value pairs for indexing:
$ curl -k -X GET 'https://admin:****@192.168.1.106:6984/sample/_design/by_addr_time/_view/by_addr_time'
{"total_rows":25,"offset":0,"rows":[
{"id":"doc_0022","key":["CA","2014"],"value":null},
{"id":"doc_0023","key":["CA","2015"],"value":null},
{"id":"doc_0024","key":["CA","2016"],"value":null},
{"id":"doc_0000","key":["CT","2011"],"value":null},
{"id":"doc_0001","key":["CT","2012"],"value":null},
{"id":"doc_0002","key":["CT","2013"],"value":null},
{"id":"doc_0003","key":["CT","2014"],"value":null},
{"id":"doc_0004","key":["CT","2015"],"value":null},
{"id":"doc_0005","key":["CT","2016"],"value":null},
{"id":"doc_0014","key":["NY","2011"],"value":null},
{"id":"doc_0015","key":["NY","2012"],"value":null},
{"id":"doc_0016","key":["NY","2013"],"value":null},
{"id":"doc_0017","key":["NY","2014"],"value":null},
{"id":"doc_0018","key":["NY","2015"],"value":null},
{"id":"doc_0019","key":["NY","2016"],"value":null},
{"id":"doc_0020","key":["NY","2017"],"value":null},
{"id":"doc_0021","key":["NY","2018"],"value":null},
{"id":"doc_0006","key":["TX","2011"],"value":null},
{"id":"doc_0008","key":["TX","2012"],"value":null},
{"id":"doc_0007","key":["TX","2013"],"value":null},
{"id":"doc_0009","key":["TX","2014"],"value":null},
{"id":"doc_0010","key":["TX","2015"],"value":null},
{"id":"doc_0011","key":["TX","2016"],"value":null},
{"id":"doc_0012","key":["TX","2017"],"value":null},
{"id":"doc_0013","key":["TX","2018"],"value":null}
]}
Now, I'm going to do a query to filter the view by doc.time
. My query parameters are:
?startkey=["AA","2017"]&endkey=["ZZ","2018"]
I expect the above query to return only the docs with the time
field between 2017
and 2018
, the address
field of those docs can have any value since I specified from AA
to ZZ
which includes all addresses on my database. I'm doing the query with curl
like this:
$ curl -k -X GET 'https://admin:****@192.168.1.106:6984/sample/_design/by_addr_time/_view/by_addr_time?startkey=\["AA","2017"\]&endkey=\["ZZ","2018"\]'
{"total_rows":25,"offset":0,"rows":[
{"id":"doc_0022","key":["CA","2014"],"value":null},
{"id":"doc_0023","key":["CA","2015"],"value":null},
{"id":"doc_0024","key":["CA","2016"],"value":null},
{"id":"doc_0000","key":["CT","2011"],"value":null},
{"id":"doc_0001","key":["CT","2012"],"value":null},
{"id":"doc_0002","key":["CT","2013"],"value":null},
{"id":"doc_0003","key":["CT","2014"],"value":null},
{"id":"doc_0004","key":["CT","2015"],"value":null},
{"id":"doc_0005","key":["CT","2016"],"value":null},
{"id":"doc_0014","key":["NY","2011"],"value":null},
{"id":"doc_0015","key":["NY","2012"],"value":null},
{"id":"doc_0016","key":["NY","2013"],"value":null},
{"id":"doc_0017","key":["NY","2014"],"value":null},
{"id":"doc_0018","key":["NY","2015"],"value":null},
{"id":"doc_0019","key":["NY","2016"],"value":null},
{"id":"doc_0020","key":["NY","2017"],"value":null},
{"id":"doc_0021","key":["NY","2018"],"value":null},
{"id":"doc_0006","key":["TX","2011"],"value":null},
{"id":"doc_0008","key":["TX","2012"],"value":null},
{"id":"doc_0007","key":["TX","2013"],"value":null},
{"id":"doc_0009","key":["TX","2014"],"value":null},
{"id":"doc_0010","key":["TX","2015"],"value":null},
{"id":"doc_0011","key":["TX","2016"],"value":null},
{"id":"doc_0012","key":["TX","2017"],"value":null},
{"id":"doc_0013","key":["TX","2018"],"value":null}
]}
The response returned by the above query seems shocking. Because it looks like it did NOT return only the docs with time
filed between 2017
and 2018
. That's just how the CouchDB indexing for array keys work. CouchDB does the indexing of array keys as if the whole array is a string including the brackets and commas of the array! If you read the reference, it would start to make sense.
Now lets change the query:
?startkey=["CT","2016"]&endkey=["TX","2011"]
The result of the above query is shown below, based on our explanations, this should make sense:
$ curl -k -X GET 'https://admin:****@192.168.1.106:6984/sample/_design/by_addr_time/_view/by_addr_time?startkey=\["CT","2016"\]&endkey=\["TX","2011"\]'
{"total_rows":25,"offset":8,"rows":[
{"id":"doc_0005","key":["CT","2016"],"value":null},
{"id":"doc_0014","key":["NY","2011"],"value":null},
{"id":"doc_0015","key":["NY","2012"],"value":null},
{"id":"doc_0016","key":["NY","2013"],"value":null},
{"id":"doc_0017","key":["NY","2014"],"value":null},
{"id":"doc_0018","key":["NY","2015"],"value":null},
{"id":"doc_0019","key":["NY","2016"],"value":null},
{"id":"doc_0020","key":["NY","2017"],"value":null},
{"id":"doc_0021","key":["NY","2018"],"value":null},
{"id":"doc_0006","key":["TX","2011"],"value":null}
]}
... What I want is to filter by
doc.time
, then group the remaining records bydoc.address
...
So, what should we do? There is a good question and answer and provides the basic ideas.
I'm not sure which idea is the best, but I implemented one idea like this: created a view named t_red
like below with a builtin _count
reduce:
function (doc) {
if(doc.time && doc.address){
emit([doc.time, doc.address], null);
}
}
Also, I created a view named a_red
with a builtin _count
reduce:
function (doc) {
if(doc.address && doc.time){
emit([doc.address, doc.time], null);
}
}
Then I developed the following code on NodeJS to query doc.time
between 2012
and 2015
and then group the results according to the doc.address
, console logs are shown inside the code as comments. I hope this code will be helpful (not confusing!):
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0"; // Ignore rejection, becasue CouchDB SSL certificate is self-signed
const fetch=require('node-fetch')
// query "t_red" view/index
fetch(`https://admin:****@192.168.1.106:6984/sample/_design/t_red/_view/t_red?group_level=2&startkey=["2012", "AA"]&endkey=["2015", "ZZ"]`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
}
}).then(
res=>res.json()
).then(data=>{
let unique_addr=[]
data.rows.map(row=>{
console.log('row.key-> ', row.key, ' row.value-> ', row.value)
// console log is shown below:
//
// row.key-> [ '2012', 'CT' ] row.value-> 1
// row.key-> [ '2012', 'NY' ] row.value-> 1
// row.key-> [ '2012', 'TX' ] row.value-> 1
// row.key-> [ '2013', 'CT' ] row.value-> 1
// row.key-> [ '2013', 'NY' ] row.value-> 1
// row.key-> [ '2013', 'TX' ] row.value-> 1
// row.key-> [ '2014', 'CA' ] row.value-> 1
// row.key-> [ '2014', 'CT' ] row.value-> 1
// row.key-> [ '2014', 'NY' ] row.value-> 1
// row.key-> [ '2014', 'TX' ] row.value-> 1
// row.key-> [ '2015', 'CA' ] row.value-> 1
// row.key-> [ '2015', 'CT' ] row.value-> 1
// row.key-> [ '2015', 'NY' ] row.value-> 1
// row.key-> [ '2015', 'TX' ] row.value-> 1
if(unique_addr.indexOf(row.key[1])==-1){ // Push unique addresses into an array
unique_addr.push(row.key[1])
}
})
console.log(unique_addr)
// Console log is shown below:
//
// [ 'CT', 'NY', 'TX', 'CA' ]
return unique_addr
}).then(unique_addr=>{
// Group the unique addresses
let group_by_address=unique_addr.map(addr=>{
// For each unique address, do a query of "a_red" view/index
return fetch(`https://admin:****@192.168.1.106:6984/sample/_design/a_red/_view/a_red?group_level=2&startkey=["${addr}","2012"]&endkey=["${addr}","2015"]`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
}
}).then(
res=>res.json()
).then(data=>{
data.rows.map(row=>{console.log('row.key-> ', row.key, ' row.value-> ', row.value)})
// Console logs related to this section of code are shown below
//row.key-> [ 'CA', '2014' ] row.value-> 1
//row.key-> [ 'CA', '2015' ] row.value-> 1
//row.key-> [ 'NY', '2012' ] row.value-> 1
//row.key-> [ 'NY', '2013' ] row.value-> 1
//row.key-> [ 'NY', '2014' ] row.value-> 1
//row.key-> [ 'NY', '2015' ] row.value-> 1
//row.key-> [ 'CT', '2012' ] row.value-> 1
//row.key-> [ 'CT', '2013' ] row.value-> 1
//row.key-> [ 'CT', '2014' ] row.value-> 1
//row.key-> [ 'CT', '2015' ] row.value-> 1
//row.key-> [ 'TX', '2012' ] row.value-> 1
//row.key-> [ 'TX', '2013' ] row.value-> 1
//row.key-> [ 'TX', '2014' ] row.value-> 1
//row.key-> [ 'TX', '2015' ] row.value-> 1
let obj={}
obj[addr]=data.rows.length // This object contains unique address and its corresponding frequency in above query
return obj
}).catch(err=>{
console.log('err-> ', err)
})
})
return group_by_address
}).then(group_by_address=>{
group_by_address.map(group=>{
group.then(()=>{
console.log('Grouped by address-> ', group)
// Console logs related this section of code are shown below:
//Grouped by address-> Promise { { CA: 2 } }
//Grouped by address-> Promise { { NY: 4 } }
//Grouped by address-> Promise { { CT: 4 } }
//Grouped by address-> Promise { { TX: 4 } }
})
})
}).catch(err=>{
console.log('err-> ', err)
})
Upvotes: 3