Reputation: 33
I'm developping a python app which uses Django ORM
as standalone to manage a database, but I'm facing a big memory issue. I've found that the part which is causing this issue is:
ports_list_save = []
for host in results['hosts']:
for protocol in results['hosts'][host]['protocols']:
for port in results['hosts'][host]['protocols'][protocol]:
current_port = history.Port(number=int(port),
protocol=protocol,
state=results['hosts'][host]['protocols'][protocol][port]['state'],
service='',
version='',
address=history.Ip.objects.get(scan=self.scan, address=host))
ports_list_save.append(current_port)
history.Port.objects.bulk_create(ports_list_save)
This part worked fine with 154 hosts and 150 ports per hosts (23000) objects to save, but now I'm trying it with 1000 ports and my computer's memory explode each time.
One more thing, I'm NOT running Django
in debug mode, so the memory doesn't come from
django.db.backends.postgresql_psycopg2.base.DatabaseWrapper
Upvotes: 0
Views: 902
Reputation: 8305
I faced with the same problem and ended up with this solution:
class BulkCreateManager(object):
model = None
chunk_size = None
instances = None
def __init__(self, model, chunk_size=None, *args):
self.model = model
self.chunk_size = chunk_size
self.instances = []
def append(self, instance):
if self.chunk_size and len(self.instances) > self.chunk_size:
self.create()
self.instances = []
self.instances.append(instance)
def create(self):
self.model.objects.bulk_create(self.instances)
ports_list_save = BulkCreateManager(history.Port, 23000)
for host in results['hosts']:
for protocol in results['hosts'][host]['protocols']:
for port in results['hosts'][host]['protocols'][protocol]:
current_port = history.Port(number=int(port),
protocol=protocol,
state=results['hosts'][host]['protocols'][protocol][port]['state'],
service='',
version='',
address=history.Ip.objects.get(scan=self.scan, address=host))
ports_list_save.append(current_port)
ports_list_save.create()
Upvotes: 0
Reputation: 43832
If you have a lot of data you still may need to load & process it in chunks, try this:
CHUNK_SIZE = 23000
ports_list_save = []
for host in results['hosts']:
for protocol in results['hosts'][host]['protocols']:
for port in results['hosts'][host]['protocols'][protocol]:
current_port = history.Port(number=int(port),
protocol=protocol,
state=results['hosts'][host]['protocols'][protocol][port]['state'],
service='',
version='',
address=history.Ip.objects.get(scan=self.scan, address=host))
ports_list_save.append(current_port)
if len(ports_list_save) > CHUNK_SIZE:
history.Port.objects.bulk_create(ports_list_save)
ports_list_save = []
if ports_list_save:
history.Port.objects.bulk_create(ports_list_save)
Upvotes: 1