Reputation: 810
Is it possible to use SQLite in AWS EFS safely? In my readings trying to determine if this is viable there appears to be some allusions that it should be doable since AWS EFS implemented NFSv4 back in 2017. In practice I am having no luck getting consistent behavior out of it.
Quick Points:
Current results are very inconsistent with 3 exceptions encountered irrespective of approach I use
database code:
SQLITE_VAR_LIMIT = 999
dgm_db_file_name = ''
db = SqliteExtDatabase(None)
lock_file = f'{os.getenv("efs_path", "tmp")}/db_lock_file.lock'
def lock_db_file():
with open(lock_file, 'w+') as lock:
limit = 900
while limit:
try:
fcntl.flock(lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
print(f'db locked')
break
except Exception as e:
print(f'Exception: {str(e)}')
limit -= 1
time.sleep(1)
if not limit:
raise ValueError(f'Timed out after 900 seconds while waiting for database lock.')
def unlock_db_file():
with open(lock_file, 'w+') as lock:
fcntl.flock(lock, fcntl.LOCK_UN)
print(f'db unlocked')
def initialize_db(db_file_path=dgm_db_file_name):
print(f'Initializing db ')
global db
db.init(db_file_path, pragmas={
'journal_mode': 'wal',
'cache_size': -1 * 64000, # 64MB
'foreign_keys': 1})
print(f'db initialized')
class Thing(Model):
name = CharField(primary_key=True)
etag = CharField()
last_modified = CharField()
class Meta:
database = db
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@staticmethod
def insert_many(stuff):
data = [(k, v['ETag'], v['Last-Modified']) for k, v in stuff.items()]
fields = [Thing.name, Thing.etag, Thing.last_modified]
limit = 900
while True:
try:
with db.atomic():
for key_batch in chunked(data, SQLITE_VAR_LIMIT // len(fields)):
s = Thing.insert_many(key_batch, fields=[Thing.name, Thing.etag, Thing.last_modified]) \
.on_conflict_replace().execute()
break
except Exception as e:
print(f'Exception: {str(e)}')
print(f'Will try for {limit} more seconds.')
limit -= 1
time.sleep(1)
if not limit:
raise ValueError('Failed to exectue query after 900 seconds.')
Example Call:
print(f'Critical section start')
# lock_db_file() # I have tried with a secondary lock file as well
self.stuff_db = Thing()
if not Path(self.db_file_path).exists():
initialize_db(self.db_file_path)
print('creating tables')
db.create_tables([Thing], safe=True)
else:
initialize_db(self.db_file_path)
getattr(Thing, insert_many)(self.stuff_db, stuff_db)
# db.close()
# unlock_db_file()
print(f'Critical section end')
print(f'len after update: {len(stuff)}')
Additional peculiarities:
Upvotes: 8
Views: 3626
Reputation: 810
After some trial and error I discovered it is a workable solution. It appears that the design will need to use APSWDatabase(..., vfs='unix-excl') to properly enforce locking.
Database code:
from peewee import *
from playhouse.apsw_ext import APSWDatabase
SQLITE_VAR_LIMIT = 999
db = APSWDatabase(None, vfs='unix-excl')
def initialize_db(db_file_path):
global db
db.init(db_file_path, pragmas={
'journal_mode': 'wal',
'cache_size': -1 * 64000})
db.create_tables([Thing], safe=True)
return Thing()
class Thing(Model):
field_1 = CharField(primary_key=True)
field_2 = CharField()
field_3 = CharField()
class Meta:
database = db
This allows for the following usage:
db_model = initialize_db(db_file_path)
with db:
# Do database queries here with the db_model
pass
Note: If you don't use the context managed database connection you will need to explicitly call db.close()
otherwise the lock will not be released from the file. Additionally, calling db_init(...)
causes a lock to be placed on the databased until it is closed.
Upvotes: 6