pythonjokeun
pythonjokeun

Reputation: 431

What is the Clojure way to transform following data?

So I've just played with Clojure today.

Using this data,

(def test-data
  [{:id 35462, :status "COMPLETED", :p 2640000, :i 261600}
   {:id 35462, :status "CREATED", :p 240000, :i 3200}
   {:id 57217, :status "COMPLETED", :p 470001, :i 48043}
   {:id 57217, :status "CREATED", :p 1409999, :i 120105}])

Then transform the above data with,

(as-> test-data input
      (group-by :id input)
      (map (fn [x] {:id (key x)
                    :p  {:a (as-> (filter #(= (:status %) "COMPLETED") (val x)) tmp
                                  (into {} tmp)
                                  (get tmp :p))
                         :b (as-> (filter #(= (:status %) "CREATED") (val x)) tmp
                                  (into {} tmp)
                                  (get tmp :p))}
                    :i  {:a (as-> (filter #(= (:status %) "COMPLETED") (val x)) tmp
                                  (into {} tmp)
                                  (get tmp :i))
                         :b (as-> (filter #(= (:status %) "CREATED") (val x)) tmp
                                  (into {} tmp)
                                  (get tmp :i))}})
           input)
      (into [] input))

To produce,

[{:id 35462, :p {:a 2640000, :b 240000}, :i {:a 261600, :b 3200}}
 {:id 57217, :p {:a 470001, :b 1409999}, :i {:a 48043, :b 120105}}]

But I have a feeling that my code is not the "Clojure way". So my question is, what is the "Clojure way" to achieve what I've produced?

Upvotes: 1

Views: 142

Answers (4)

Alan Thompson
Alan Thompson

Reputation: 29984

I would approach it more like the following, so it can handle any number of entries for each :id value. Of course, many variations are possible.

(ns tst.demo.core
  (:use demo.core tupelo.core tupelo.test)
  (:require
    [tupelo.core :as t] ))

(dotest
  (let [test-data [{:id 35462, :status "COMPLETED", :p 2640000, :i 261600}
                   {:id 35462, :status "CREATED", :p 240000, :i 3200}
                   {:id 57217, :status "COMPLETED", :p 470001, :i 48043}
                   {:id 57217, :status "CREATED", :p 1409999, :i 120105}]
        d1        (group-by :id test-data)
        d2        (t/forv [[id entries] d1]
                    {:id         id
                     :status-all (mapv :status entries)
                     :p-all      (mapv :p entries)
                     :i-all      (mapv :i entries)})]
    (is= d1
      {35462
       [{:id 35462, :status "COMPLETED", :p 2640000, :i 261600}
        {:id 35462, :status "CREATED", :p 240000, :i 3200}],
       57217
       [{:id 57217, :status "COMPLETED", :p 470001, :i 48043}
        {:id 57217, :status "CREATED", :p 1409999, :i 120105}]})

    (is= d2 [{:id         35462,
              :status-all ["COMPLETED" "CREATED"],
              :p-all      [2640000 240000],
              :i-all      [261600 3200]}
             {:id         57217,
              :status-all ["COMPLETED" "CREATED"],
              :p-all      [470001 1409999],
              :i-all      [48043 120105]}])
    ))

Upvotes: 1

rmcv
rmcv

Reputation: 1976

There are no one "Clojure way" (I guess you mean functional way) as it depends on how you decompose a problem.

Here is the way I will do:

(->> test-data
     (map (juxt :id :status identity))
     (map ->nested)
     (apply deep-merge)
     (map (fn [[id m]]
            {:id id
             :p  (->ab-map m :p)
             :i  (->ab-map m :i)})))

;; ({:id 35462, :p {:a 2640000, :b 240000}, :i {:a 261600, :b 3200}}
;;  {:id 57217, :p {:a 470001, :b 1409999}, :i {:a 48043, :b 120105}})

As you can see, I used a few functions and here is the step-by-step explanation:

  1. Extract index keys (id + status) and the map itself into vector
(map (juxt :id :status identity) test-data)
;; ([35462 "COMPLETED" {:id 35462, :status "COMPLETED", :p 2640000, :i 261600}]
;;  [35462 "CREATED" {:id 35462, :status "CREATED", :p 240000, :i 3200}]
;;  [57217 "COMPLETED" {:id 57217, :status "COMPLETED", :p 470001, :i 48043}]
;;  [57217 "CREATED" {:id 57217, :status "CREATED", :p 1409999, :i 120105}])
  1. Transform into nested map (id, then status)
(map ->nested *1)
;; ({35462 {"COMPLETED" {:id 35462, :status "COMPLETED", :p 2640000, :i 261600}}}
;;  {35462 {"CREATED" {:id 35462, :status "CREATED", :p 240000, :i 3200}}}
;;  {57217 {"COMPLETED" {:id 57217, :status "COMPLETED", :p 470001, :i 48043}}}
;;  {57217 {"CREATED" {:id 57217, :status "CREATED", :p 1409999, :i 120105}}})
  1. Merge nested map by id
(apply deep-merge *1)
;; {35462
;;  {"COMPLETED" {:id 35462, :status "COMPLETED", :p 2640000, :i 261600},
;;   "CREATED" {:id 35462, :status "CREATED", :p 240000, :i 3200}},
;;  57217
;;  {"COMPLETED" {:id 57217, :status "COMPLETED", :p 470001, :i 48043},
;;   "CREATED" {:id 57217, :status "CREATED", :p 1409999, :i 120105}}}
  1. For attribute :p and :i, map to :a and :b according to status
(->ab-map {"COMPLETED" {:id 35462, :status "COMPLETED", :p 2640000, :i 261600},
           "CREATED" {:id 35462, :status "CREATED", :p 240000, :i 3200}}
          :p)
;; => {:a 2640000, :b 240000}

And below are the few helper functions I used:

(defn ->ab-map [m k]
  (zipmap [:a :b]
          (map #(get-in m [% k]) ["COMPLETED" "CREATED"])))

(defn ->nested [[k & [v & r :as t]]]
  {k (if (seq r) (->nested t) v)})

(defn deep-merge [& xs]
  (if (every? map? xs)
    (apply merge-with deep-merge xs)
    (apply merge xs)))

Upvotes: 1

pete23
pete23

Reputation: 2280

It's a pretty odd transformation, keys seem a little arbitrary and it's hard to generalise from n=2 (or indeed to know whether n ever > 2).

I'd use functional decomposition to factor out some of the commonality and get some traction. First of all let us transform the statuses into our keys...

(def status->ab {"COMPLETED" :a "CREATED" :b})

Then, with that in hand, I'd like an easy way of getting the "meat" outof the substructure. Here, for a given key into the data, I'm providing the content of the enclosing map for that key and a given group result.

(defn subgroup->subresult [k subgroup]
    (apply array-map (mapcat #(vector (status->ab (:status %)) (k %)) subgroup)))

With this, the main transformer becomes much more tractable:

(defn group->result [group] 
        {
         :id (key group)
         :p (subgroup->subresult :p (val group))
         :i (subgroup->subresult :i (val group))})

I wouldn't consider generalising across :p and :i for this - if you had more than two keys, then maybe I would generate a map of k -> the subgroup result and do some sort of reducing merge. Anyway, we have an answer:

(map group->result (group-by :id test-data))
;; =>
({:id 35462, :p {:b 240000, :a 2640000}, :i {:b 3200, :a 261600}}
 {:id 57217, :p {:b 1409999, :a 470001}, :i {:b 120105, :a 48043}})

Upvotes: 2

Taylor Wood
Taylor Wood

Reputation: 16194

The only things that stand out to me are using as-> when ->> would work just as well, and some work being done redundantly, and some destructuring opportunities:

(defn aggregate [[id values]]
  (let [completed (->> (filter #(= (:status %) "COMPLETED") values)
                       (into {}))
        created   (->> (filter #(= (:status %) "CREATED") values)
                       (into {}))]
     {:id id
      :p  {:a (:p completed)
           :b (:p created)}
      :i  {:a (:i completed)
           :b (:i created)}}))

(->> test-data
     (group-by :id)
     (map aggregate))
=>
({:id 35462, :p {:a 2640000, :b 240000}, :i {:a 261600, :b 3200}}
 {:id 57217, :p {:a 470001, :b 1409999}, :i {:a 48043, :b 120105}})

However, pouring those filtered values (which are maps themselves) into a map seems suspect to me. This is creating a last-one-wins scenario where the order of your test data affects the output. Try this to see how different orders of test-data affect output:

(into {} (filter #(= (:status %) "COMPLETED") (shuffle test-data)))

Upvotes: 3

Related Questions