Python Code
Python Code
import sys
glueContext = GlueContext(SparkContext.getOrCreate())
db_name = "Redshift"
tbl_name = "Sales_report"
output_dir = "s3://glue-sample-target/output-dir/sales_report"
# The `provider id` field will be choice between long and string
# Cast choices into integers, those values that cannot cast result in null
sales_df = sales_res.toDF()
# SPDX-License-Identifier: MIT-0
import sys
glueContext = GlueContext(SparkContext.getOrCreate())
db_name = "Redshift"
tbl_persons = "persons_json"
tbl_membership = "memberships_json"
tbl_organization = "organizations_json"
output_history_dir = "s3://glue-sample-target/output-dir/sales_history"
redshift_temp_dir = "s3://glue-sample-target/temp-dir/"
# Convert to data frame, write to directory "legislator_part", partitioned by (separate) Senate and House.
l_history.toDF().write.parquet(output_lg_partitioned_dir, partitionBy=['org_name'])
m_df = dfc.select(df_name)
1)Histogram:
import pandas as pd
import matplotlib.pyplot as plt
data = [['E001', 'M', 34, 123, 'Normal', 350],
['E002', 'F', 40, 114, 'Overweight', 450],
['E003', 'F', 37, 135, 'Obesity', 169],
['E004', 'M', 30, 139, 'Underweight', 189],
['E005', 'F', 44, 117, 'Underweight', 183],
['E006', 'M', 36, 121, 'Normal', 80],
['E007', 'M', 32, 133, 'Obesity', 166],
['E008', 'F', 26, 140, 'Normal', 120],
['E009', 'M', 32, 133, 'Normal', 75],
['E010', 'M', 36, 133, 'Underweight', 40] ]
df = pd.DataFrame(data, columns = ['EMPID', 'Gender',
'Age', 'Sales',
'BMI', 'Income'] )
df.hist()
plt.show()
Output
2)Column Chart:
data = [['E001', 'M', 34, 123, 'Normal', 350],
['E002', 'F', 40, 114, 'Overweight', 450],
['E003', 'F', 37, 135, 'Obesity', 169],
['E004', 'M', 30, 139, 'Underweight', 189],
['E005', 'F', 44, 117, 'Underweight', 183],
['E006', 'M', 36, 121, 'Normal', 80],
['E007', 'M', 32, 133, 'Obesity', 166],
['E008', 'F', 26, 140, 'Normal', 120],
['E009', 'M', 32, 133, 'Normal', 75],
['E010', 'M', 36, 133, 'Underweight', 40] ]
df = pd.DataFrame(data, columns = ['EMPID', 'Gender',
'Age', 'Sales',
'BMI', 'Income'] )
df.plot.bar()
plt.bar(df['Age'], df['Sales'])
plt.xlabel("Age")
plt.ylabel("Sales")
plt.show()
Output:
3) Scatter Plot:
import matplotlib.pyplot as plt
plt.xlabel("Time (years)")
plt.ylabel("Price (dollars)")
plt.grid(True)
plt.legend()
plt.show()
Output: