Itay Marom
Itay Marom

Reputation: 843

Python: Why is my generator based range is X2 slower than xrange?

Just of curiosity, I've written 3 tests in Python and timed them out using timeit:

import timeit

# simple range based on generator
def my_range(start, stop):
    i = start
    while (i < stop):
        yield i
        i += 1

# test regular range
def test_range():
    x = range(1, 100000)
    sum = 0

    for i in x:
        sum += i

# test xrange
def test_xrange():
    x = xrange(1, 100000)
    sum = 0

    for i in x:
        sum += i

# test my range
def test_my_range():
    x = my_range(1, 100000)
    sum = 0

    for i in x:
        sum += i


print timeit.timeit("test_range()", setup = "from __main__ import test_range", number = 100)
print timeit.timeit("test_xrange()", setup = "from __main__ import test_xrange", number = 100)
print timeit.timeit("test_my_range()", setup = "from __main__ import test_my_range", number = 100)

And I've got these benchmarks:

regular range based test - 0.616795163262

xrange based test - 0.537716731096

my_range (generator) based test - **1.27872886337**

My range was X2 slower even than a range that creates a list. Why?

  1. Are xrange() / range() implemented using C directly?

  2. Are they implemented without condition check?

Thanks!

Upvotes: 1

Views: 76

Answers (2)

Will
Will

Reputation: 24689

I feel that the simple answer is that xrange() is builtin and written in C.

I added another case to your test (see below): A pure-Python reference implementation of xrange() based on the CPython source.

import timeit

from collections import Sequence, Iterator
from math import ceil


# simple range based on generator
def my_range(start, stop):
    i = start
    while (i < stop):
        yield i
        i += 1

# test regular range
def test_range():
    x = range(1, 100000)
    sum = 0

    for i in x:
        sum += i

# test xrange
def test_xrange():
    x = xrange(1, 100000)
    sum = 0

    for i in x:
        sum += i

# test my range
def test_my_range():
    x = my_range(1, 100000)
    sum = 0

    for i in x:
        sum += i


class pure_python_xrange(Sequence):
    """Pure-Python implementation of an ``xrange`` (aka ``range``
    in Python 3) object. See `the CPython documentation
    <http://docs.python.org/py3k/library/functions.html#range>`_
    for details.
    """

    def __init__(self, *args):
        if len(args) == 1:
            start, stop, step = 0, args[0], 1
        elif len(args) == 2:
            start, stop, step = args[0], args[1], 1
        elif len(args) == 3:
            start, stop, step = args
        else:
            raise TypeError('pure_python_xrange() requires 1-3 int arguments')

        try:
            start, stop, step = int(start), int(stop), int(step)
        except ValueError:
            raise TypeError('an integer is required')


        if step == 0:
            raise ValueError('pure_python_xrange() arg 3 must not be zero')
        elif step < 0:
            stop = min(stop, start)
        else:
            stop = max(stop, start)

        self._start = start
        self._stop = stop
        self._step = step
        self._len = (stop - start) // step + bool((stop - start) % step)

    def __repr__(self):
        if self._start == 0 and self._step == 1:
            return 'pure_python_xrange(%d)' % self._stop
        elif self._step == 1:
            return 'pure_python_xrange(%d, %d)' % (self._start, self._stop)
        return 'pure_python_xrange(%d, %d, %d)' % (self._start, self._stop, self._step)

    def __eq__(self, other):
        return isinstance(other, xrange) and \
               self._start == other._start and \
               self._stop == other._stop and \
               self._step == other._step

    def __len__(self):
        return self._len

    def index(self, value):
        """Return the 0-based position of integer `value` in
        the sequence this xrange represents."""
        diff = value - self._start
        quotient, remainder = divmod(diff, self._step)
        if remainder == 0 and 0 <= quotient < self._len:
            return abs(quotient)
        raise ValueError('%r is not in range' % value)

    def count(self, value):
        """Return the number of ocurrences of integer `value`
        in the sequence this xrange represents."""
        # a value can occur exactly zero or one times
        return int(value in self)

    def __contains__(self, value):
        """Return ``True`` if the integer `value` occurs in
        the sequence this xrange represents."""
        try:
            self.index(value)
            return True
        except ValueError:
            return False

    def __reversed__(self):
        """Return an xrange which represents a sequence whose
        contents are the same as the sequence this xrange
        represents, but in the opposite order."""
        sign = self._step / abs(self._step)
        last = self._start + ((self._len - 1) * self._step)
        return pure_python_xrange(last, self._start - sign, -1 * self._step)

    def __getitem__(self, index):
        """Return the element at position ``index`` in the sequence
        this xrange represents, or raise :class:`IndexError` if the
        position is out of range."""
        if isinstance(index, slice):
            return self.__getitem_slice(index)
        if index < 0:
            # negative indexes access from the end
            index = self._len + index
        if index < 0 or index >= self._len:
            raise IndexError('xrange object index out of range')
        return self._start + index * self._step

    def __getitem_slice(self, slce):
        """Return an xrange which represents the requested slce
        of the sequence represented by this xrange.
        """
        start, stop, step = slce.start, slce.stop, slce.step
        if step == 0:
            raise ValueError('slice step cannot be 0')

        start = start or self._start
        stop = stop or self._stop
        if start < 0:
            start = max(0, start + self._len)
        if stop < 0:
            stop = max(start, stop + self._len)

        if step is None or step > 0:
            return pure_python_xrange(start, stop, step or 1)
        else:
            rv = reversed(self)
            rv._step = step
            return rv

    def __iter__(self):
        """Return an iterator which enumerates the elements of the
        sequence this xrange represents."""
        return xrangeiterator(self)

class xrangeiterator(Iterator):
    """An iterator for an :class:`xrange`.
    """

    def __init__(self, xrangeobj):
        self._xrange = xrangeobj

        # Intialize the "last outputted value" to the value
        # just before the first value; this simplifies next()
        self._last = self._xrange._start - self._xrange._step
        self._count = 0

    def __iter__(self):
        """An iterator is already an iterator, so return ``self``.
        """
        return self

    def next(self):
        """Return the next element in the sequence represented
        by the xrange we are iterating, or raise StopIteration
        if we have passed the end of the sequence."""
        self._last += self._xrange._step
        self._count += 1
        if self._count > self._xrange._len:
            raise StopIteration()
        return self._last


# test xrange
def test_pure_python_xrange():
    x = pure_python_xrange(1, 100000)
    sum = 0

    for i in x:
        sum += i


print timeit.timeit("test_range()", setup = "from __main__ import test_range", number = 100)
print timeit.timeit("test_xrange()", setup = "from __main__ import test_xrange", number = 100)
print timeit.timeit("test_my_range()", setup = "from __main__ import test_my_range", number = 100)
print timeit.timeit("test_pure_python_xrange()", setup = "from __main__ import test_pure_python_xrange", number = 100)

The results?

$ python so.py

0.426695823669

0.371111869812

0.964643001556

6.06390094757

This is simply the difference between interpreted Python code and C. Additionally, as @byels mentioned above, xrange() is limited to short integers, which likely has positive effect.

Upvotes: 4

inlinestyle
inlinestyle

Reputation: 127

This is an interesting test. Looking at the python 2 docs on xrange, one guess that comes to mind is that xrange is alowed to take advantage of type restrictions (only uses "short" integers)

Upvotes: 2

Related Questions