import matplotlib
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline


# Load the data set
fifa_filepath = "fifa.csv"

fifa_data = pd.read_csv(fifa_filepath, index_col="Date", parse_dates=True)
print(fifa_data.shape)

fifa_data.head(10)

(286, 6)


plt.figure(figsize=(16,6))
# Line chart showing how FIFA rankings evolved over time 
sns.lineplot(data=fifa_data)
plt.title('FIFA RANKINGS')

Text(0.5, 1.0, 'FIFA RANKINGS')


plt.figure(figsize=(16,6))
# Line chart showing how FIFA rankings evolved over time 
plt.title("FIFA RANKINGS")
ax = sns.lineplot(data=fifa_data.loc[:, ['ARG', 'BRA']])
# ax = sns.lineplot(data=fifa_data['BRA'], label = 'BRA_rank')
ax.set(xlabel = "Year",  ylabel = "Rank")
# plt.ylabel ("Rank")

[Text(0.5, 0, 'Year'), Text(0, 0.5, 'Rank')]


# Set the width and height of the figure
plt.figure(figsize=(14,7))
flight_data = pd.read_csv('flight_delays.csv', index_col="Month")
# Add title
plt.title("Average Arrival Delay for Each Airline, by Month")
# print(flight_data.head())
# Heatmap showing average arrival delay for each airline by month
ax = sns.heatmap(data=flight_data, annot=True)

# Add label for horizontal axis
ax.set(xlabel ="X- Axis Airline", ylabel = ("Month 2015"))

[Text(0.5, 42.0, 'X- Axis Airline'), Text(105.0, 0.5, 'Month 2015')]


plt.figure(figsize=(14,7))
sns.barplot(x=flight_data.index, y=flight_data['DL'])

<AxesSubplot:xlabel='Month', ylabel='DL'>


insurance_data = pd.read_csv('insurance.csv')
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'])

<AxesSubplot:xlabel='bmi', ylabel='charges'>


# Scatter plots can be used to display the relationships between three variables
# One way of doing this is by color-coding the points.
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'],
                hue=insurance_data['smoker'])

<AxesSubplot:xlabel='bmi', ylabel='charges'>


iris_data = pd.read_csv('iris.csv', index_col="Id")
print(iris_data.shape)
iris_data.head()

(150, 5)


# sns.distplot(a=iris_data['Petal Length'], kde=False, vertical = True)
sns.displot(iris_data, y = 'Petal Length')
# sns.displot(iris_data, x = 'Petal Length')

<seaborn.axisgrid.FacetGrid at 0x147f6c850>


# KDE plot
sns.kdeplot(iris_data['Petal Length'], shade = True)

<AxesSubplot:xlabel='Petal Length', ylabel='Density'>


# 2D KDE plot
sns.jointplot(x=iris_data['Petal Length'], y=iris_data['Sepal Width (cm)'], kind="kde", color = 'green')

<seaborn.axisgrid.JointGrid at 0x15400ead0>

	ARG	BRA	ESP	FRA	GER	ITA
Date
1993-08-08	5.0	8.0	13.0	12.0	1.0	2.0
1993-09-23	12.0	1.0	14.0	7.0	5.0	2.0
1993-10-22	9.0	1.0	7.0	14.0	4.0	3.0
1993-11-19	9.0	4.0	7.0	15.0	3.0	1.0
1993-12-23	8.0	3.0	5.0	15.0	1.0	2.0
1994-02-15	9.0	2.0	6.0	14.0	1.0	7.0
1994-03-15	8.0	2.0	6.0	15.0	1.0	11.0
1994-04-19	10.0	1.0	7.0	15.0	2.0	13.0
1994-05-17	6.0	1.0	9.0	17.0	2.0	16.0
1994-06-14	8.0	3.0	5.0	13.0	1.0	4.0

	Sepal Length	Sepal Width (cm)	Petal Length	Petal Width (cm)	Species
Id
1	5.1	3.5	1.4	0.2	Iris-setosa
2	4.9	3.0	1.4	0.2	Iris-setosa
3	4.7	3.2	1.3	0.2	Iris-setosa
4	4.6	3.1	1.5	0.2	Iris-setosa
5	5.0	3.6	1.4	0.2	Iris-setosa

Programming for Data Science¶

Visualization - seaborn¶