import cassandra
import pyspark


from cassandra.cluster import Cluster
#cluster = Cluster(['192.168.126.45'])
cluster = Cluster(['192.168.88.186'])
session = cluster.connect('test')

rows = session.execute('select * from testing123 limit 5;')

for row in rows:
    print(row.id)

1
2


import pandas as pd
rows = session.execute('select * from testing123;')

df = pd.DataFrame(list(rows))
df


from pyspark.sql import *
spark = SparkSession.builder.appName('test').master("spark://192.168.88.186:7077").getOrCreate()
df = spark.read.format("org.apache.spark.sql.cassandra").option("spark.cassandra.connection.host", '192.168.88.186').options(table="testing123", keyspace="test").load()
print ("Table Row Count: ")
print (df.count())

23/07/13 18:47:37 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.

Table Row Count:

[Stage 0:>                                                        (0 + 20) / 34]

2


df.show()

+---+--------+------+
| id|    city|  name|
+---+--------+------+
|  2|     NYC|  Toby|
|  1|Bay Area|Amanda|
+---+--------+------+


spark.stop()

	id	city	name
0	1	Bay Area	Amanda
1	2	NYC	Toby