Reading and writing from and to
Amazon Redshift
The following code examples use PySpark to read and write sample data from and to an Amazon Redshift database with data source API and using SparkSQL.
Use PySpark to read and write sample data from and to an Amazon Redshift database with data source API.
import boto3
from pyspark.sql import SQLContext
sc = # existing SparkContext
sql_context = SQLContext(sc)
url = "jdbc:redshift:iam://redshifthost:5439/database"
aws_iam_role_arn = "arn:aws:iam::accountID
:role/roleName
"
df = sql_context.read \
.format("io.github.spark_redshift_community.spark.redshift") \
.option("url", url
) \
.option("dbtable", "tableName
") \
.option("tempdir", "s3://path/for/temp/data
") \
.option("aws_iam_role", "aws_iam_role_arn
") \
.load()
df.write \
.format("io.github.spark_redshift_community.spark.redshift") \
.option("url", url
) \
.option("dbtable", "tableName_copy
") \
.option("tempdir", "s3://path/for/temp/data
") \
.option("aws_iam_role", "aws_iam_role_arn
") \
.mode("error") \
.save()