Reputation: 8460
The function "good-red" used to calculate the 18 highest frequency numbers from a file ssqHitNum.txt.
(defun good-red ()
(let ((tab (make-hash-table)) (res '()) (nums) (sort-res))
(dotimes (i 33) (setf (gethash (+ i 1) tab) 0))
(with-open-file (stream "ssqHitNum.txt")
(loop :for line = (read-line stream nil)
:until (null line)
:do
(setq nums (butlast (str2lst (subseq line 6))))
(dolist (n nums) (incf (gethash n tab)))
))
(maphash #'(lambda (k v) (push (cons k v) res)) tab)
(setq sort-res (sort res #'> :key #'cdr))
;(print sort-res)
(subseq (mapcar #'car sort-res) 0 18)))
$head ssqHitNum.txt
10000 7 12 18 19 22 28 4
10000 16 17 23 26 31 32 11
10000 3 4 18 22 24 29 11
10000 4 9 10 18 29 32 8
10000 5 7 10 14 17 25 11
The number is between 1 and 33. Whether or not i use a hashtab
and scan the file line by line as the Common Lisp code does in ocaml ? Or there is more elegant way using ocaml ?
Any suggestion is appreciated !
Upvotes: 2
Views: 969
Reputation: 48687
FWIW, in F# 3.0 you can just write it like this:
System.IO.File.ReadLines @"ssqHitNum.txt"
|> Seq.collect (fun s -> s.Split ' ' |> Seq.skip 1)
|> Seq.countBy id
|> Seq.sortBy (fun (_, p) -> -p)
|> Seq.take 18
In OCaml, I'd start by writing these useful library functions from scratch:
let readAllLines file =
let lines = ref [] in
let input = open_in file in
begin
try
while true do
lines := input_line input :: !lines
done
with
| End_of_file ->
close_in input
end;
List.rev !lines
let collect f xs =
List.concat (List.map f xs)
let countBy f xs =
let counts = Hashtbl.create 100 in
let find key = try Hashtbl.find counts (f key) with Not_found -> 0 in
let add key = Hashtbl.replace counts (f key) (1 + find key) in
List.iter add xs;
Hashtbl.fold (fun k n kns -> (k, n)::kns) table []
let sortBy f xs =
List.sort (fun x y -> compare (f x) (f y)) xs
let rec truncate n xs =
match n, xs with
| 0, _ | _, [] -> []
| n, x::xs -> x::truncate (n-1) xs
let rec skip n xs =
match n, xs with
| 0, xs -> xs
| n, [] -> []
| n, _::xs -> skip (n-1) xs
let (|>) x f = f x
let id x = x
and then write it the same way:
readAllLines "ssqHitNum.txt"
|> collect (fun s -> split ' ' s |> skip 1)
|> countBy id
|> sortBy (fun (_, p) -> -p)
|> truncate 18
The F# is still better because the lines are read on-demand whereas my OCaml reads everything into memory up-front.
Upvotes: 13
Reputation: 166
With a fixed set of small integers it may be simpler to use an array:
let good_red () =
let a = Array.make 33 0 in
let bump i = a.(i-1) <- a.(i-1) + 1 in
let rec iter_lines fh =
try
let words = Str.split (Str.regexp " +") (input_line fh) in
List.iter bump (List.map int_of_string (List.tl words));
iter_lines fh
with End_of_file -> () in
let fh = open_in "ssqHitNum.txt" in
iter_lines fh;
close_in fh;
let b = Array.mapi (fun i freq -> (i+1,freq)) a in
Array.sort (fun (i1,f1) (i2,f2) -> compare f2 f1) b;
Array.sub b 0 18;;
try
Array.iter (fun (i,freq) -> Printf.printf "%2d %2d\n" freq i) (good_red ())
with Invalid_argument _ -> print_endline "bad input"
As gasche mentions you need to compile with str.cma or str.cmxa.
Upvotes: 2
Reputation: 31459
As asked by z_axis, here is another solution using only the base library distributed with the OCaml compiler. It is a bit more verbose due to the absence of some convenience functions.
let freq file best_n =
let table = Hashtbl.create 100 in
let freq_num num =
Hashtbl.replace table num
(1 + try Hashtbl.find table num with Not_found -> 0) in
begin
let input = open_in file in
try while true do
let line = input_line input in
let nums = List.tl (Str.split (Str.regexp " +") line) in
List.iter freq_num nums
done with End_of_file -> close_in input
end;
let sorted =
let cmp (_,freq1) (_,freq2) = (* decreasing *) compare freq2 freq1 in
List.sort cmp (Hashtbl.fold (fun k x li -> (k,x)::li) table []) in
(* take not tail-rec, not a problem for small n such as n=18 *)
let rec take n = function
| li when n = 0 -> []
| [] -> []
| hd::tl -> hd :: take (n - 1) tl in
take best_n sorted
The regexp module Str is not linked by default, even if it is in the default search path, so you must explicitly compile the program with str.cma
(for ocamlc
) or str.cmxa
(for ocamlopt
). In the toplevel, #use "topfind";;
then #require "str";;
will do.
Upvotes: 3
Reputation: 31459
From a similarly high-level point of view, using Batteries:
open Batteries_uni
let freq file best_n =
let table = Hashtbl.create 100 in
let freq_num num =
Hashtbl.replace table num (1 + Hashtbl.find_default table num 0) in
let freq_line line =
let nums = List.tl (String.nsplit line " ") in
List.iter freq_num nums in
Enum.iter freq_line (File.lines_of file);
let cmp (_,freq1) (_,freq2) = (* decreasing *) compare freq2 freq1 in
Hashtbl.enum table |> List.of_enum
|> List.sort ~cmp
|> List.take best_n
To test, from the toplevel:
#use "topfind";;
#require "batteries";;
#use "/tmp/test.ml";;
test "/tmp/test.txt" 18;;
Upvotes: 4
Reputation: 1
I'm not sure to understand the problem you want to solve. (Why all your input lines start with 10000
?)
If you just want to find the 18-th highest frequency numbers, you don't need to read line by line (and this is true in Lisp, in C, in Ocaml, ...), and Ocaml's Scanf.scanf "%d" (fun x -> ...)
could do the input.
And using an Hashtbl.t
is sensible in Ocaml.
Upvotes: 1