Reputation: 2797
I have a mongodb replica set with a lot of databases, collections & indexes.
We did a lot of refactor and optimization and, of course, I have a lot of "creative queries" from the consumers.
I would like to clean up the unused indexes. just wanna save some space.
How can I check if an index is being used? I can afford to check index by index and drop the unused ones.
Running an "explain" in all the possible queries is not an option :)
EDIT: SOLUTION BASED ON THE ACCEPTED ANSWER
The script was bugged. I am not a javascript expert, but I put the corrected script. I hope will be useful for someone:
DB.prototype.indexStats = function() {
var queries = [];
var collections = db.getCollectionNames();
var findQuery = function(q) {
for(entryIdx in queries) {
if(q == queries[entryIdx].query) {
return entryIdx;
}
}
return -1;
}
for(cIdx in collections) {
var cName = collections[cIdx];
var nsName = db.getName()+"."+cName;
if(cName.indexOf("system") == -1) {
var i = 1;
var count = db.system.profile.count({ns:nsName});
print('scanning profile {ns:"'+nsName+'"} with '+count+' records... this could take a while...');
db.system.profile.find({ns:nsName}).addOption(16).batchSize(10000).forEach(function(profileDoc) {
if(profileDoc.query && !profileDoc.query["$explain"]) {
var qIdx = findQuery(profileDoc.query);
if(qIdx == -1 && profileDoc.query["query"] ) {
var size = queries.push({query:profileDoc.query, count:1, index:""});
var explain = db[cName].find(queries[size-1].query).explain();
if(profileDoc.query && profileDoc.query["query"]) {
queries[size-1].sort = profileDoc.query["orderby"];
if(queries[size-1].sort) {
explain = db[cName].find(queries[size-1].query.query).sort(queries[size-1].sort).explain();
}
}
queries[size-1].cursor = explain.cursor;
queries[size-1].millis = explain.millis;
queries[size-1].nscanned = explain.nscanned;
queries[size-1].n = explain.n;
queries[size-1].scanAndOrder = explain.scanAndOrder ? true : false;
if(explain.cursor && explain.cursor != "BasicCursor") {
queries[size-1].index = explain.cursor.split(" ")[1];
} else {
print('warning, no index for query {ns:"'+nsName+'"}: ');
printjson(profileDoc.query);
print('... millis: ' + queries[size-1].millis);
print('... nscanned/n: ' + queries[size-1].nscanned + '/' + queries[size-1].n);
print('... scanAndOrder: ' + queries[size-1].scanAndOrder);
}
} else if ( qIdx != -1 ) {
queries[qIdx].count++;
}
}
});
}
}
for(cIdx in collections) {
var cName = collections[cIdx];
if(cName.indexOf("system") == -1) {
print('checking for unused indexes in: ' + cName);
for(iIdx in db[cName].getIndexes()) {
var iName = db[cName].getIndexes()[iIdx].name;
if(iName.indexOf("system") == -1) {
var stats = db[cName].stats();
var found = false;
for(qIdx in queries) {
if(queries[qIdx].index == iName) {
found = true;
break;
}
}
if(!found) {
print('this index is not being used: ');
printjson(iName);
}
}
}
}
}
}
Upvotes: 31
Views: 24840
Reputation: 1038
The simplest solution to this is to use the mongodb inbuilt $indexStats aggregation stage, added in MongoDB 3.2.
Using the Mongo console:
db.collection.aggregate([ { $indexStats: { } } ])
Using PyMongo:
from pymongo import MongoClient
collection = MongoClient()[db_name][collection_name]
index_stats = collection.aggregate([{'$indexStats':{}}])
for index_info in index_stats:
print index_info
Upvotes: 57
Reputation: 27517
There is a pretty cool script out on Github that you should look at:
https://github.com/wfreeman/indexalizer
Basically it involves turning on profiling for your database and then it will use the data collected by the profiler to drive explain() calls. It then tells you both which indexes are not being used and which queries are not using indexes. Pretty slick.
More about mongoDB database profiling:
http://docs.mongodb.org/manual/reference/database-profiler/
Upvotes: 14