z_axis
z_axis

Reputation: 8460

How to convert such a lisp function to ocaml?

The function "good-red" used to calculate the 18 highest frequency numbers from a file ssqHitNum.txt.

(defun good-red ()
(let ((tab (make-hash-table)) (res '()) (nums) (sort-res))
    (dotimes (i 33) (setf (gethash (+ i 1) tab) 0))
    (with-open-file (stream "ssqHitNum.txt")
        (loop :for line = (read-line stream nil)
             :until (null line)
             :do
                (setq nums (butlast (str2lst (subseq line 6))))
                (dolist (n nums) (incf (gethash n tab)))
                ))
    (maphash #'(lambda (k v) (push (cons k v) res)) tab)
    (setq sort-res (sort res #'> :key #'cdr))
    ;(print sort-res)
    (subseq (mapcar #'car sort-res) 0 18)))

$head ssqHitNum.txt

10000 7 12 18 19 22 28 4
10000 16 17 23 26 31 32 11
10000 3 4 18 22 24 29 11
10000 4 9 10 18 29 32 8
10000 5 7 10 14 17 25 11

The number is between 1 and 33. Whether or not i use a hashtab and scan the file line by line as the Common Lisp code does in ocaml ? Or there is more elegant way using ocaml ?

Any suggestion is appreciated !

Upvotes: 2

Views: 969

Answers (5)

J D
J D

Reputation: 48687

FWIW, in F# 3.0 you can just write it like this:

System.IO.File.ReadLines @"ssqHitNum.txt"
|> Seq.collect (fun s -> s.Split ' ' |> Seq.skip 1)
|> Seq.countBy id
|> Seq.sortBy (fun (_, p) -> -p)
|> Seq.take 18

In OCaml, I'd start by writing these useful library functions from scratch:

let readAllLines file =
  let lines = ref [] in
  let input = open_in file in
  begin
    try
      while true do
        lines := input_line input :: !lines
      done
    with
    | End_of_file ->
        close_in input
  end;
  List.rev !lines

let collect f xs =
  List.concat (List.map f xs)

let countBy f xs =
  let counts = Hashtbl.create 100 in
  let find key = try Hashtbl.find counts (f key) with Not_found -> 0 in
  let add key = Hashtbl.replace counts (f key) (1 + find key) in
  List.iter add xs;
  Hashtbl.fold (fun k n kns -> (k, n)::kns) table []

let sortBy f xs =
  List.sort (fun x y -> compare (f x) (f y)) xs

let rec truncate n xs =
  match n, xs with
  | 0, _ | _, [] -> []
  | n, x::xs -> x::truncate (n-1) xs

let rec skip n xs =
  match n, xs with
  | 0, xs -> xs
  | n, [] -> []
  | n, _::xs -> skip (n-1) xs

let (|>) x f = f x

let id x = x

and then write it the same way:

readAllLines "ssqHitNum.txt"
|> collect (fun s -> split ' ' s |> skip 1)
|> countBy id
|> sortBy (fun (_, p) -> -p)
|> truncate 18

The F# is still better because the lines are read on-demand whereas my OCaml reads everything into memory up-front.

Upvotes: 13

Toby Kelsey
Toby Kelsey

Reputation: 166

With a fixed set of small integers it may be simpler to use an array:

   let good_red () =
      let a = Array.make 33 0 in
      let bump i = a.(i-1) <- a.(i-1) + 1 in
      let rec iter_lines fh =
         try
            let words = Str.split (Str.regexp " +") (input_line fh) in
            List.iter bump (List.map int_of_string (List.tl words));
            iter_lines fh
         with End_of_file -> () in
      let fh = open_in "ssqHitNum.txt" in
      iter_lines fh;
      close_in fh;
      let b = Array.mapi (fun i freq -> (i+1,freq)) a in
      Array.sort (fun (i1,f1) (i2,f2) -> compare f2 f1) b;
      Array.sub b 0 18;;

   try
   Array.iter (fun (i,freq) -> Printf.printf "%2d %2d\n" freq i) (good_red ())
   with Invalid_argument _ -> print_endline "bad input"

As gasche mentions you need to compile with str.cma or str.cmxa.

Upvotes: 2

gasche
gasche

Reputation: 31459

As asked by z_axis, here is another solution using only the base library distributed with the OCaml compiler. It is a bit more verbose due to the absence of some convenience functions.

let freq file best_n =
  let table = Hashtbl.create 100 in
  let freq_num num =
    Hashtbl.replace table num
      (1 + try Hashtbl.find table num with Not_found -> 0) in
  begin
    let input = open_in file in
    try while true do
        let line = input_line input in
        let nums = List.tl (Str.split (Str.regexp " +") line) in
        List.iter freq_num nums
    done with End_of_file -> close_in input
  end;
  let sorted =
    let cmp (_,freq1) (_,freq2) = (* decreasing *) compare freq2 freq1 in
    List.sort cmp (Hashtbl.fold (fun k x li -> (k,x)::li) table []) in
  (* take not tail-rec, not a problem for small n such as n=18 *)
  let rec take n = function
    | li when n = 0 -> []
    | [] -> []
    | hd::tl -> hd :: take (n - 1) tl in
  take best_n sorted

The regexp module Str is not linked by default, even if it is in the default search path, so you must explicitly compile the program with str.cma (for ocamlc) or str.cmxa (for ocamlopt). In the toplevel, #use "topfind";; then #require "str";; will do.

Upvotes: 3

gasche
gasche

Reputation: 31459

From a similarly high-level point of view, using Batteries:

open Batteries_uni

let freq file best_n =
  let table = Hashtbl.create 100 in
  let freq_num num =
    Hashtbl.replace table num (1 + Hashtbl.find_default table num 0) in
  let freq_line line =
    let nums = List.tl (String.nsplit line " ") in
    List.iter freq_num nums in
  Enum.iter freq_line (File.lines_of file);
  let cmp (_,freq1) (_,freq2) = (* decreasing *) compare freq2 freq1 in
  Hashtbl.enum table |> List.of_enum
    |> List.sort ~cmp
    |> List.take best_n

To test, from the toplevel:

#use "topfind";;
#require "batteries";;
#use "/tmp/test.ml";;
test "/tmp/test.txt" 18;;

Upvotes: 4

I'm not sure to understand the problem you want to solve. (Why all your input lines start with 10000?)

If you just want to find the 18-th highest frequency numbers, you don't need to read line by line (and this is true in Lisp, in C, in Ocaml, ...), and Ocaml's Scanf.scanf "%d" (fun x -> ...) could do the input.

And using an Hashtbl.t is sensible in Ocaml.

Upvotes: 1

Related Questions