In [1]:
import cassandra
import pyspark
In [2]:
from cassandra.cluster import Cluster
#cluster = Cluster(['192.168.126.45'])
cluster = Cluster(['192.168.88.186'])
session = cluster.connect('test')

rows = session.execute('select * from testing123 limit 5;')

for row in rows:
    print(row.id)
1
2
In [3]:
import pandas as pd
rows = session.execute('select * from testing123;')

df = pd.DataFrame(list(rows))
df
Out[3]:
id city name
0 1 Bay Area Amanda
1 2 NYC Toby
In [4]:
from pyspark.sql import *
spark = SparkSession.builder.appName('test').master("spark://192.168.88.186:7077").getOrCreate()
df = spark.read.format("org.apache.spark.sql.cassandra").option("spark.cassandra.connection.host", '192.168.88.186').options(table="testing123", keyspace="test").load()
print ("Table Row Count: ")
print (df.count())
23/07/13 18:47:37 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
Table Row Count: 
[Stage 0:>                                                        (0 + 20) / 34]
2
                                                                                
In [5]:
df.show()
+---+--------+------+
| id|    city|  name|
+---+--------+------+
|  2|     NYC|  Toby|
|  1|Bay Area|Amanda|
+---+--------+------+

In [6]:
spark.stop()
In [ ]: