user8563312
user8563312

Reputation:

prime factorization with large numbers in python

whazzup,

Having the following problem, I can't get it fixed. Handling with numbers having a length around 5 - 52, I wan't to get their prime factors. Using Python, I found the following two algorithms:

I.

def faktorisiere(n):
    l = []  # Lösungsmenge
    # Auf Teilbarkeit durch 2, und alle ungeraden Zahlen von 3..n/2 testen
    for i in chain([2], range(3, n//2 + 1, 2)):
        # Ein Teiler kann mehrfach vorkommen (z.B. 4 = 2 * 2), deswegen:
        while n % i == 0:
            l.append(i)
            print(i)
            n = n // i  # "//" ist ganzzahlige Division und entspricht int(n/i)
        if i > n:  # Alle Teiler gefunden? Dann Abbruch.
            break
    print(l)

II.

x = input("Number: ")
mode = x[-1:]
x = int(x[:-1])
if int(x) < 1000000000000:
    faktorisiere(x)
else:
    if mode == 'f':
        faktorisiere(x)
    rx = int(sqrt(x)) + 1

    for i in range(1, rx+1):
        if mode == 'a':
            if x % i == 0:
                y = int(x/i)
                print(str(x), "=", i, "*", str(y))
        if mode == 'u':
            if i % 2 != 0:
                if x % i == 0:
                    y = int(x/i)
                    print(str(x), "=", i, "*", str(y))

But the first code is very slow computing numbers like these: 1917141215419419171412154194191714

The second is working a little faster, but I get wrong outputs and I can't find the mistake in code. As an example we take the given number. These are the first lines of pythons output:

But as you can see, these multiplications don't equal the result. Is there any mistake in the code? Or is it just because of the length of numbers? Hope you can help me.

Best wishes, TimeMen

Upvotes: 2

Views: 7936

Answers (2)

sh1
sh1

Reputation: 4763

Python 3 edits to @user448810's answer:

def isPrime(n, k=5): # miller-rabin
    from random import randint
    if n < 2: return False
    for p in [2,3,5,7,11,13,17,19,23,29]:
        if n % p == 0: return n == p
    s, d = 0, n-1
    while d % 2 == 0:
        s, d = s+1, d//2
    for i in range(k):
        x = pow(randint(2, n-1), d, n)
        if x == 1 or x == n-1: continue
        for r in range(1, s):
            x = (x * x) % n
            if x == 1: return False
            if x == n-1: break
        else: return False
    return True

def factors(n, b2=-1, b1=10000): # 2,3,5-wheel, then rho
    def gcd(a,b): # euclid's algorithm
        if b == 0: return a
        return gcd(b, a%b)
    def insertSorted(x, xs): # linear search
        i, ln = 0, len(xs)
        while i < ln and xs[i] < x: i += 1
        xs.insert(i,x)
        return xs
    if -1 <= n <= 1: return [n]
    if n < -1: return [-1] + factors(-n)
    wheel = [1,2,2,4,2,4,2,4,6,2,6]
    w, f, fs = 0, 2, []
    while f*f <= n and f < b1:
        while n % f == 0:
            fs.append(f)
            n //= f
        f, w = f + wheel[w], w+1
        if w == 11: w = 3
    if n == 1: return fs
    h, t, g, c = 1, 1, 1, 1
    while not isPrime(n):
        while b2 != 0 and g == 1:
            h = (h*h+c)%n # the hare runs
            h = (h*h+c)%n # twice as fast
            t = (t*t+c)%n # as the tortoise
            g = gcd(t-h, n); b2 -= 1
        if b2 == 0: return fs
        if isPrime(g):
            while n % g == 0:
                fs = insertSorted(g, fs)
                n //= g
        h, t, g, c = 1, 1, 1, c+1
    return insertSorted(n, fs)

print(factors(1917141215419419171412154194191714))

Upvotes: 0

user448810
user448810

Reputation: 17866

You need a better algorithm than trial division to factor numbers the size you are working with. A simple step up from trial division is John Pollard's rho algorithm:

Python 2.7.13 (default, Mar 13 2017, 20:56:15)
[GCC 5.4.0] on cygwin
Type "help", "copyright", "credits" or "license" for more information.
>>> def isPrime(n, k=5): # miller-rabin
...     from random import randint
...     if n < 2: return False
...     for p in [2,3,5,7,11,13,17,19,23,29]:
...         if n % p == 0: return n == p
...     s, d = 0, n-1
...     while d % 2 == 0:
...         s, d = s+1, d/2
...     for i in range(k):
...         x = pow(randint(2, n-1), d, n)
...         if x == 1 or x == n-1: continue
...         for r in range(1, s):
...             x = (x * x) % n
...             if x == 1: return False
...             if x == n-1: break
...         else: return False
...     return True
...
>>> def factors(n, b2=-1, b1=10000): # 2,3,5-wheel, then rho
...     def gcd(a,b): # euclid's algorithm
...         if b == 0: return a
...         return gcd(b, a%b)
...     def insertSorted(x, xs): # linear search
...         i, ln = 0, len(xs)
...         while i < ln and xs[i] < x: i += 1
...         xs.insert(i,x)
...         return xs
...     if -1 <= n <= 1: return [n]
...     if n < -1: return [-1] + factors(-n)
...     wheel = [1,2,2,4,2,4,2,4,6,2,6]
...     w, f, fs = 0, 2, []
...     while f*f <= n and f < b1:
...         while n % f == 0:
...             fs.append(f)
...             n /= f
...         f, w = f + wheel[w], w+1
...         if w == 11: w = 3
...     if n == 1: return fs
...     h, t, g, c = 1, 1, 1, 1
...     while not isPrime(n):
...         while b2 <> 0 and g == 1:
...             h = (h*h+c)%n # the hare runs
...             h = (h*h+c)%n # twice as fast
...             t = (t*t+c)%n # as the tortoise
...             g = gcd(t-h, n); b2 -= 1
...         if b2 == 0: return fs
...         if isPrime(g):
...             while n % g == 0:
...                 fs = insertSorted(g, fs)
...                 n /= g
...         h, t, g, c = 1, 1, 1, c+1
...     return insertSorted(n, fs)
...
>>> factors(1917141215419419171412154194191714)
[2, 3, 13, 449941L, 54626569996995593878495243L]

The factorization is returned immediately. See here for an explanation of how it works. To factor even larger numbers, you will need to look at algorithms like the elliptic curve method or the quadratic sieve, but beware that both those algorithms are complicated.

Upvotes: 3

Related Questions