Reputation: 1498
I have indexed a json file in Mongodb in collection "clicklog" using a shell command. Below is the result of my shell command:
db.clicklogs.find().pretty()
Output:
{
"_id" : ObjectId("58fe78dcfbe21fa7896552e8"),
"preview" : false,
"offset" : 0,
"result" : {
"search_term" : "484797",
"request_time" : "Sat Apr 01 23:58:49 -0400 2017",
"request_ip" : "127.0.0.1",
"stats_type" : "clickstats",
"upi" : "66024330304",
"unit" : "CITCS",
"job_title" : "IT Engineer",
"vpu" : "ICR",
"organization" : "73",
"location" : "MH",
"city" : "San Diego",
"country" : "USA",
"title" : "TOM",
"tab_name" : "People-Tab",
"page_name" : "PEOPLE",
"result_number" : "1",
"page_num" : "0",
"session_id" : "14e88b44576ad4fdc035bc41529762ad1",
"total_results" : "1",
"_raw":"request_time=Sat Apr 01 23:58:49 -0400 2017,request_ip=127.0.0.1,application=Search,stats_type=clickstats,upi=660243301304,unit=CITCS,job_title=IT Assistant, Client Services,vpu=ICR,location=DHAKA, BANGLADESH (IFC),organization=73,city=Dhaka,country=BANGLADESH,city_code=,search_term=484797,title= Tom,url=http://isearch.worldbank.org/skillfinder/ppl_profile_new/000484797,tab_name=People-Tab,page_name=PEOPLE,result_number=1,page_num=0,filter=qterm=484797,total_results=1,app_environment=production,log_version=1.0,session_id=4e88b44576ad4fdc035bc41529762ad1",
"_time":"2017-04-01T23:58:49.000-0400"
}
}
{"_id" : ObjectId("58fe78dcfbe21fa7896552e9"),
"preview" : false,
"offset" : 0,
"result" : {
"search_term" : "demo",
"request_time" : "Sat Apr 01 23:58:49 -0400 2017",
"request_ip" : "127.0.0.1",
....
"time":"2017-04-01T23:58:49.000-0400"
}
}
For every json document, I would like to get only the few field(id,searchterm,upi,page_name,sessionid, url(which is under _raw)). Is it possible to do it using mongo shell commands and store the result document in a new collection? Any help is appreciated.
Upvotes: 0
Views: 218
Reputation: 75984
You can try below aggregation in 3.4 version.
The query uses $split
operator couple of times to reach to url
value. Rest is standard projection fields.
$out
stage to write the results into new collection.
db.getCollection('clicklogs').aggregate([{
$project: {
searchterm: "$result.searchterm",
upi: "$result.upi",
page_name: "$result.page_name",
session_id: "$result.session_id",
url: {
$let: {
vars: {
obj: {
$arrayElemAt: [{
$split: ["$result._raw", ',']
}, 1]
}
},
in: {
$arrayElemAt: [{
$split: ["$$obj", '=']
}, 1]
}
}
}
}
},
{
$out: "clicklogs_temp"
}
])
Upvotes: 1