import cassandra
import pyspark
from cassandra.cluster import Cluster
#cluster = Cluster(['192.168.126.45'])
cluster = Cluster(['192.168.88.186'])
session = cluster.connect('test')
rows = session.execute('select * from testing123 limit 5;')
for row in rows:
print(row.id)
1 2
import pandas as pd
rows = session.execute('select * from testing123;')
df = pd.DataFrame(list(rows))
df
id | city | name | |
---|---|---|---|
0 | 1 | Bay Area | Amanda |
1 | 2 | NYC | Toby |
from pyspark.sql import *
spark = SparkSession.builder.appName('test').master("spark://192.168.88.186:7077").getOrCreate()
df = spark.read.format("org.apache.spark.sql.cassandra").option("spark.cassandra.connection.host", '192.168.88.186').options(table="testing123", keyspace="test").load()
print ("Table Row Count: ")
print (df.count())
23/07/13 18:47:37 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
Table Row Count:
[Stage 0:> (0 + 20) / 34]
2
df.show()
+---+--------+------+ | id| city| name| +---+--------+------+ | 2| NYC| Toby| | 1|Bay Area|Amanda| +---+--------+------+
spark.stop()