pedrommuller
pedrommuller

Reputation: 16066

Importing 5k+ rows in odoo 12 gives me timeout

I'm trying to import 5000+ rows in Odoo 12 it's basically a mapping from a CSV developed in a custom method in a module, the problem I'm getting timeout in the request, that's happening when writing to the database, I'm using the standard ERP methods create and write.

How can I work around a solution to this? I know bulk insert is not possible to this, any other solution to this?

is a SQL command for insertion OK to use?

class file_reader(models.TransientModel):
_name = "rw.file.reader"
csv_file = fields.Binary(string='CSV File', required=True)

@api.multi
def import_csv(self):
    # csv importer handler
    file = base64.b64decode(self.csv_file).decode().split('\n')
    reader = csv.DictReader(file)
    # account.analytic.line
    ignored = []
    time1 = datetime.now()
            
    self._cr.execute('select id, name from project_project where active = true')
    projects = self._cr.fetchall()
    
    self._cr.execute('select id, login from res_users')
    users = self._cr.fetchall()
    
    self._cr.execute('select id, work_email from hr_employee')
    employees = self._cr.fetchall()
    LOG_EVERY_N = 100

    for row in reader:
        project_name = row['Project - Name']
        email = row['User - Email Address']
        project = [item for item in projects if item[1] == project_name]
        
        if len(project) >0:
            user = [item for item in users if item[1] == email]
            employee = [item for item in employees if item[1] == email]
                            
            if len(user)>0 and len(employee)>0:
                task = self.env['project.task'].search([['user_id','=',user[0][0]],
                                                        ['project_id','=',project[0][0] ]],limit=1)
                if task:
                    y = row['Duration'].split(':')   
                    i, j = y[0], y[1]  
                    
                    model = {
                        'project_id': project[0][0],
                        'task_id': task['id'],
                        'employee_id':employee[0][0],
                        'user_id': user[0][0],
                        'date': row['Date'],
                        'unit_amount': int(i) + (float(j) / 60), # Time Spent convertion to float
                        'is_timesheet': True,
                        'billable':  True if row['Billable'] == 'Yes' else False,
                        'nexonia_id':row['ID']
                        }
                    
                    time_sheet = self.env['account.analytic.line'].search([['nexonia_id','=', row['ID']]],limit=1)
                    
                    if time_sheet:
                        model.update({'id':time_sheet.id})
                        self.env['account.analytic.line'].sudo().write(model)
                    else:
                        self.env['account.analytic.line'].sudo().create(model)
            else:
                if email not in ignored:
                    ignored.append(email)
        else:
            if project_name not in ignored:
                ignored.append(project_name)
        
        all_text = 'Nothing ignored'
        if ignored is not None:
            all_text = "\n".join(filter(None, ignored))
            
        message_id = self.env['message.wizard'].create({
            'message': "Import data completed",
            'ignored': all_text
        })
    
    time2 = datetime.now()
    logging.info('total time ------------------------------------------ %s',time2-time1)
    
    return {
        'name': 'Successfull',
        'type': 'ir.actions.act_window',
        'view_mode': 'form',
        'res_model': 'message.wizard',
        # pass the id
        'res_id': message_id.id,
        'target': 'new'
    }

Upvotes: 1

Views: 1224

Answers (2)

OmaL
OmaL

Reputation: 5044

While you are importing records through script, code optimization is very important.Try to reduce the number of search/read calls by using dictionary to save each result or use the SQL which i don't recommend.

Upvotes: 0

Charif DZ
Charif DZ

Reputation: 14751

I Enhanced your code a litle bit because you are searching for each project, user and employee using loop for each row and for 5000+ row. Using ORM method is always good because, they handle the stored compute fields and python constrains, but this will take time too if you don't have any complex compute you can use INSERT or UPDATE query this will speed up the importion 100 times.

@api.multi
def import_csv(self):
    # when you use env[model] for more than ones extract it to variable its better
    # notice how I added sudo to the name of variable
    AccountAnalyticLine_sudo =self.env['account.analytic.line'].sudo()

    # csv importer handler
    file = base64.b64decode(self.csv_file).decode().split('\n')
    reader = csv.DictReader(file)
    # account.analytic.line
    ignored = []
    
    time1 = datetime.now()

    # convert result to dictionary for easy access later
    self._cr.execute('select id, name from project_project where active = true order by name')
    projects = {p[1]: p for p in self._cr.fetchall()}
    
    self._cr.execute('select id, login from res_users order by login')
    users = {u[1]: u for u in self._cr.fetchall()}
    
    self._cr.execute('select id, work_email from hr_employee order by work_email')
    employees = {emp[1]: emp for emp in self._cr.fetchall()}
    LOG_EVERY_N = 100

    for row in reader:
        project_name = row['Project - Name']
        email = row['User - Email Address']
        # no need for loop and the dicionary loopkup is so fast
        project = projects.get(project_name)
        
        if project:
            user = user.get(email)
            employee = employees.get(email)
                            
            if user and employee:
                task = self.env['project.task'].search([('user_id','=',user[0]),
                                                        ('project_id','=',project[0])],
                                                        limit=1)
                if task:
                    y = row['Duration'].split(':')   
                    i, j = y[0], y[1]  
                    
                    # by convention dictionary that are passed to create or write should be named vals or values
                    vals = {
                        'project_id': project[0],
                        'task_id': task['id'],
                        'employee_id':employee[0],
                        'user_id': user[0],
                        'date': row['Date'],
                        'unit_amount': int(i) + (float(j) / 60), # Time Spent convertion to float
                        'is_timesheet': True,
                        'billable':  True if row['Billable'] == 'Yes' else False,
                        'nexonia_id':row['ID']
                        }
                    
                    time_sheet = AccountAnalyticLine_sudo.search([('nexonia_id','=', row['ID'])],limit=1)
                    # I think adding logger message here will be or create and update counters to know how much record record were updated or created
                    if time_sheet:
                        # I think you want to update the existing time sheet record so do this
                        # record.write(vals)
                        time_sheet.write(vals)
                        
                        # you are updating an empty RecordSet 
                        #self.env['account.analytic.line'].sudo().write(model)
                    else:
                        # create new one
                        AccountAnalyticLine_sudo.create(model)
            else:
                if email not in ignored:
                    ignored.append(email)
        else:
            if project_name not in ignored:
                ignored.append(project_name)
        
        all_text = 'Nothing ignored'
        # ignored is not None is always True because ignored is a list
        if ignored:
            all_text = "\n".join(filter(None, ignored))
            
        message_id = self.env['message.wizard'].create({
            'message': "Import data completed",
            'ignored': all_text
        })
    
    time2 = datetime.now()
    logging.info('total time ------------------------------------------ %s',time2-time1)
    
    return {
        'name': 'Successfull',
        'type': 'ir.actions.act_window',
        'view_mode': 'form',
        'res_model': 'message.wizard',
        # pass the id
        'res_id': message_id.id,
        'target': 'new'
    }
    
    

I hope this will help you a little bit even that the question is meant for somethinng else but I'm confused Odoo usually allow request to be handled for 60 minutes.

Upvotes: 1

Related Questions