Reputation: 10866
I need to normalize a list of elements before applying Enum.chunk_by
to the list.
Typically when we enumerate we go item by item, for example
source_list |> normalize_item
How may we enumerate while referencing previous element in the list?
Also, how may we keep a reference to the last normalized_item on the next iteration?
Sample Data:
[
%PhoneBills.Text{end: 91, page: 13, row: 237, start: 48, text: "2348035250601"},
%PhoneBills.Text{end: 155, page: 13, row: 237, start: 99, text: "17-12-2014 10:08:32"},
%PhoneBills.Text{end: 247, page: 13, row: 238, start: 168, text: "1080643204171320 2811630"},
%PhoneBills.Text{end: 286, page: 13, row: 238, start: 268, text: "400.00"},
%PhoneBills.Text{end: 394, page: 13, row: 238, start: 370, text: "Payment"},
%PhoneBills.Text{end: 91, page: 13, row: 244, start: 48, text: "2348035250601"},
%PhoneBills.Text{end: 155, page: 13, row: 244, start: 99, text: "17-12-2014 10:09:12"},
%PhoneBills.Text{end: 247, page: 13, row: 246, start: 168, text: "1775292204171752 2811630"},
%PhoneBills.Text{end: 286, page: 13, row: 246, start: 268, text: "400.00"},
%PhoneBills.Text{end: 394, page: 13, row: 246, start: 370, text: "Payment"},
%PhoneBills.Text{end: 91, page: 13, row: 252, start: 48, text: "2348068014410"},
%PhoneBills.Text{end: 155, page: 13, row: 252, start: 99, text: "17-12-2014 08:34:23"},
%PhoneBills.Text{end: 250, page: 13, row: 254, start: 168, text: "174729900817063 100153465"},
%PhoneBills.Text{end: 286, page: 13, row: 254, start: 263, text: "1,500.00"},
%PhoneBills.Text{end: 394, page: 13, row: 254, start: 370, text: "Payment"}
]
I need to normalize the row
fields above, where two adjacent elements are actually on the same row, if their difference is <= 2
, before applying Enum.chunk_by
to the list
I have considered Enum.chunk_while
but it does not offer a good solution in this case.
Expected output:
[
%PhoneBills.Text{end: 91, page: 13, row: 237, start: 48, text: "2348035250601"},
%PhoneBills.Text{end: 155, page: 13, row: 237, start: 99, text: "17-12-2014 10:08:32"},
%PhoneBills.Text{end: 247, page: 13, row: 237, start: 168, text: "1080643204171320 2811630"},
%PhoneBills.Text{end: 286, page: 13, row: 237, start: 268, text: "400.00"},
%PhoneBills.Text{end: 394, page: 13, row: 237, start: 370, text: "Payment"},
%PhoneBills.Text{end: 91, page: 13, row: 244, start: 48, text: "2348035250601"},
%PhoneBills.Text{end: 155, page: 13, row: 244, start: 99, text: "17-12-2014 10:09:12"},
%PhoneBills.Text{end: 247, page: 13, row: 244, start: 168, text: "1775292204171752 2811630"},
%PhoneBills.Text{end: 286, page: 13, row: 244, start: 268, text: "400.00"},
%PhoneBills.Text{end: 394, page: 13, row: 244, start: 370, text: "Payment"},
%PhoneBills.Text{end: 91, page: 13, row: 252, start: 48, text: "2348068014410"},
%PhoneBills.Text{end: 155, page: 13, row: 252, start: 99, text: "17-12-2014 08:34:23"},
%PhoneBills.Text{end: 250, page: 13, row: 252, start: 168, text: "174729900817063 100153465"},
%PhoneBills.Text{end: 286, page: 13, row: 252, start: 263, text: "1,500.00"},
%PhoneBills.Text{end: 394, page: 13, row: 252, start: 370, text: "Payment"}
]
Upvotes: 1
Views: 1326
Reputation: 121000
The easiest most straightforward approach would be to use Enum.reduce/3
. Unfortunately, I was unable to parse the requirements on how exactly you want to mutate row
, but here is a generic example:
Enum.reduce(source_list, %{last: nil, values: []}, fn e, acc ->
%{last: e, values: acc.values ++ [e]}
end)
Also, Enum.chunk_while/4
is also a good candidate to do the job. If you could produce the expected output, I might come out with an example of chunk_while
application.
defmodule PhoneBills.Text,
do: defstruct ~w|end page row start text|a
input = [...]
Enum.reduce(input, %{row: 0, acc: []}, fn e, acc ->
row = if e.row - acc.row <= 2, do: acc.row, else: e.row
%{row: row, acc: acc.acc ++ [%PhoneBills.Text{e | row: row}]}
end)
The solution with Enum.chunk_while/4
:
chunk_fun = fn
e, %{row: 0, acc: acc} -> # init
{:cont, %{acc: [e], row: e.row}}
%{row: e_row} = e, %{row: row, acc: acc} when e_row - row > 2 -> # emit
{:cont, acc, %{row: e_row, acc: [e]}}
e, %{row: row, acc: acc} -> # continue collecting
{:cont, %{row: row, acc: acc ++ [%PhoneBills.Text{e | row: row}]}}
end
after_fun = fn
%{acc: []} -> {:cont, []}
%{acc: acc} -> {:cont, acc, []}
end
Enum.chunk_while(input, %{row: 0, acc: []}, chunk_fun, after_fun)
That way the output is already both adjusted and chunked.
EDIT: Solution accounting for page numbers: actual test data
chunk_fun = fn
# init
e, %{page: 0, row: 0, acc: acc} ->
{:cont, %{acc: [e], page: e.page, row: e.row}}
# emit - while on same page, or on different page
%{row: e_row, page: e_page} = e, %{page: page, row: row, acc: acc}
when e_page == page and e_row - row > 2 or e_page != page ->
{:cont, acc, %{page: e_page, row: e_row, acc: [e]}}
# continue collecting
e, %{page: page, row: row, acc: acc} ->
{:cont, %{page: page, row: row, acc: acc ++ [%Text{e | row: row, page: page}]}}
end
after_fun = fn
%{acc: []} -> {:cont, []}
%{acc: acc} -> {:cont, acc, []}
end
Enum.chunk_while(input, %{row: 0, page: 0, acc: []}, chunk_fun, after_fun)
Upvotes: 3