Michael Locher
Michael Locher

Reputation: 165

Creating an endless Iterator with a given distribution

Given a java.util.Collection what is the easiest way to create an endless java.util.Iterator which returns those elements such that they show up according to a given distribution (org.apache.commons.math.distribution)?

Upvotes: 2

Views: 462

Answers (2)

Michael Locher
Michael Locher

Reputation: 165

Solution for Gaussian distribution

import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.SortedMap;
import java.util.Map.Entry;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.ImmutableSortedMap.Builder;

/**
 * Endless sequence with gaussian distribution.
 * 
 * @param <T> the type of the elements
 * @author Michael Locher
 */
public class GaussianSequence<T> implements Iterable<T>, Iterator<T> {

  private static final int HISTOGRAMM_SAMPLES = 50000;

  private static final int HISTOGRAMM_ELEMENTS = 100;

  private static final int HISTOGRAMM_LENGTH = 80;

  private static final double DEFAULT_CUTOFF = 4.0;

  private final List<T> elements;
  private final int maxIndex;
  private final Random rnd;
  private final double scaling;
  private final double halfCount;

  /**
   * Creates this.
   * @param rnd the source of randomness to use
   * @param elements the elements to deliver
   */
  public GaussianSequence(final Random rnd, final Collection<T> elements) {
    this(rnd, DEFAULT_CUTOFF, elements);
  }

  private GaussianSequence(final Random rnd, final double tailCutOff, final Collection<T> elements) {
    super();
    this.rnd = rnd;
    this.elements = new ArrayList<T>(elements);
    if (this.elements.isEmpty()) {
      throw new IllegalArgumentException("no elements provided");
    }
    this.maxIndex = this.elements.size() - 1;
    this.halfCount = this.elements.size() / 2.0;
    this.scaling = this.halfCount / tailCutOff;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public Iterator<T> iterator() {
    return this;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public boolean hasNext() {
    return true;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void remove() {
    throw new UnsupportedOperationException();
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public T next() {
    return this.elements.get(sanitizeIndex(determineNextIndex()));
  }

  private int determineNextIndex() {
    final double z = this.rnd.nextGaussian();
    return (int) (this.halfCount + (this.scaling * z));
  }

  private int sanitizeIndex(final int index) {
    if (index < 0) {
      return 0;
    }
    if (index > this.maxIndex) {
      return this.maxIndex;
    }
    return index;
  }

  /**
   * Prints a histogramm to stdout.
   * @param args not used
   */
  public static void main(final String[] args) {
    final PrintWriter out = new PrintWriter(new OutputStreamWriter(System.out, Charset.forName("UTF-8")), true);
    plotHistogramm(new Random(), out);
  }

  private static void plotHistogramm(final Random rnd, final PrintWriter out) {
    // build elements
    final Multimap<Integer, Integer> results = ArrayListMultimap.create();
    final List<Integer> elements = Lists.newArrayListWithCapacity(HISTOGRAMM_ELEMENTS);
    for (int i = 1; i < HISTOGRAMM_ELEMENTS; i++) {
      elements.add(i);
    }
    // sample sequence
    final Iterator<Integer> randomSeq = new GaussianSequence<Integer>(rnd, elements);
    for (int j = 0; j < HISTOGRAMM_SAMPLES; j++) {
      final Integer sampled = randomSeq.next();
      results.put(sampled, sampled);
    }
    // count and sort results
    final Builder<Integer, Integer> r = ImmutableSortedMap.naturalOrder();
    for (final Entry<Integer, Collection<Integer>> e : results.asMap().entrySet()) {
      final int count = e.getValue().size();
      r.put(e.getKey(), count);
    }
    // plot results
    final SortedMap<Integer, Integer> sortedAndCounted = r.build();
    final double histogramScale = (double) HISTOGRAMM_LENGTH / Collections.max(sortedAndCounted.values());
    for (final Entry<Integer, Integer> e : sortedAndCounted.entrySet()) {
      out.format("%3d [%4d]", e.getKey(), e.getValue());
      final StringBuilder c = new StringBuilder();
      final int lineLength = (int) (histogramScale * e.getValue());
      for (int i = 0; i < lineLength; i++) {
        c.append('*');
      }
      out.println(c);
    }
  }

}

Upvotes: 1

oxbow_lakes
oxbow_lakes

Reputation: 134330

List<Object> l = new ArrayList<Object>(coll);
Iterator<Object> i = new Iterator<Object>() {
    public boolean hasNext() { return true; }

    public Object next() {
        return coll.get(distribution.nextInt(0, l.size());
    }
} 

Your problem is then how to convert the Distribution classes in the apache library to implement the nextInt method. I have to say that it is far from obvious to me how you can actually do this from the Distribution interface.

One (slightly rubbish) way I can think of is to generate an EmpiricalDistribution (in the random package) dataset using the probability defined by your actual distribution and then using that emprirical dsitribution as the distribution (above)

Upvotes: 4

Related Questions