Reputation: 87
I'm try to run some insert queries on Apache Cassandra using Python. I want to insert the data from a json file, here's my code:
import logging
from cassandra.cluster import Cluster
import json
logging.basicConfig(level=logging.INFO)
def connect_db():
"""Func to connect to cassandra db"""
cluster = Cluster(['127.0.0.1'], port=9042)
session = cluster.connect()
# session.execute("DROP TABLE player_session.events")
# session.execute("DROP TABLE player_session.startevents ")
# session.execute("DROP TABLE player_session.endevents ")
return session
def execute_query():
"""Func to execute query in cassandra """
session = connect_db()
print("Creating KEYSPACE")
session.execute("""
CREATE KEYSPACE IF NOT EXISTS player_session
WITH REPLICATION =
{ 'class' : 'NetworkTopologyStrategy', 'data_center' : 1 }
""")
print("Creating player_session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.events(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
print("Creating start session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.startevents(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
print("Creating end session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.endevents(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
return session
def insert_data(session):
"""Func to insert json data """
with open('my_json.jsonl') as f:
data = f.readlines()
for row in data:
row = json.loads(row)
if row['event'] == "start":
session.execute(
"INSERT INTO player_session.startevents (player_id, event, country, session_id, ts) VALUES (%s,%s,%s,%s,%s) ",
[row['player_id'], row['event'], row['country'], row['session_id'], row['ts']]
)
if row['event'] == "end":
session.execute(
"INSERT INTO player_session.endevents (player_id, event, session_id, ts) VALUES (%s,%s,%s,%s) ",
[row['player_id'], row['event'], row['session_id'], row['ts']]
)
f.close()
print("data import complete")
if __name__ == "__main__":
session = connect_db()
insert_data(session)
My tables are created in Cassandra but I will always get this error:
Traceback (most recent call last):
line 64, in insert_data
session.execute(
File "cassandra/cluster.py", line 2618, in cassandra.cluster.Session.execute
File "cassandra/cluster.py", line 4894, in cassandra.cluster.ResponseFuture.result
cassandra.cluster.NoHostAvailable: ('Unable to complete the operation against any hosts', {<Host: 127.0.0.1:9042 datacenter1>: Unavailable('Error from server: code=1000 [Unavailable exception] message="Cannot achieve consistency level LOCAL_ONE" info={\'consistency\': \'LOCAL_ONE\', \'required_replicas\': 1, \'alive_replicas\': 0}')})
Upvotes: 1
Views: 509
Reputation: 57748
The error message suggests two possibilities:
Cassandra is not running. You can verify this with nodetool status
.
The data center name you are using to connect is incorrect, either in the keyspace definition or in the cluster config. The default name for the data center is dc1
. Whatever this is set to, it must match the data center name as shown in nodetool status
, describe keyspace player_session
, and the data center name specified in your connection properties (optional).
Upvotes: 1