Programming for Data Science¶

Visualization - seaborn¶

Dr. Bhargavi R

SCOPE, VIT Chennai

In [1]:
import matplotlib
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
In [2]:
# Load the data set
fifa_filepath = "fifa.csv"

fifa_data = pd.read_csv(fifa_filepath, index_col="Date", parse_dates=True)
print(fifa_data.shape)

fifa_data.head(10)
(286, 6)
Out[2]:
ARG BRA ESP FRA GER ITA
Date
1993-08-08 5.0 8.0 13.0 12.0 1.0 2.0
1993-09-23 12.0 1.0 14.0 7.0 5.0 2.0
1993-10-22 9.0 1.0 7.0 14.0 4.0 3.0
1993-11-19 9.0 4.0 7.0 15.0 3.0 1.0
1993-12-23 8.0 3.0 5.0 15.0 1.0 2.0
1994-02-15 9.0 2.0 6.0 14.0 1.0 7.0
1994-03-15 8.0 2.0 6.0 15.0 1.0 11.0
1994-04-19 10.0 1.0 7.0 15.0 2.0 13.0
1994-05-17 6.0 1.0 9.0 17.0 2.0 16.0
1994-06-14 8.0 3.0 5.0 13.0 1.0 4.0
In [3]:
plt.figure(figsize=(16,6))
# Line chart showing how FIFA rankings evolved over time 
sns.lineplot(data=fifa_data)
plt.title('FIFA RANKINGS')
Out[3]:
Text(0.5, 1.0, 'FIFA RANKINGS')
In [4]:
plt.figure(figsize=(16,6))
# Line chart showing how FIFA rankings evolved over time 
plt.title("FIFA RANKINGS")
ax = sns.lineplot(data=fifa_data.loc[:, ['ARG', 'BRA']])
# ax = sns.lineplot(data=fifa_data['BRA'], label = 'BRA_rank')
ax.set(xlabel = "Year",  ylabel = "Rank")
# plt.ylabel ("Rank")
Out[4]:
[Text(0.5, 0, 'Year'), Text(0, 0.5, 'Rank')]
In [5]:
# Set the width and height of the figure
plt.figure(figsize=(14,7))
flight_data = pd.read_csv('flight_delays.csv', index_col="Month")
# Add title
plt.title("Average Arrival Delay for Each Airline, by Month")
# print(flight_data.head())
# Heatmap showing average arrival delay for each airline by month
ax = sns.heatmap(data=flight_data, annot=True)

# Add label for horizontal axis
ax.set(xlabel ="X- Axis Airline", ylabel = ("Month 2015"))
Out[5]:
[Text(0.5, 42.0, 'X- Axis Airline'), Text(105.0, 0.5, 'Month 2015')]
In [6]:
plt.figure(figsize=(14,7))
sns.barplot(x=flight_data.index, y=flight_data['DL'])
Out[6]:
<AxesSubplot:xlabel='Month', ylabel='DL'>
In [7]:
insurance_data = pd.read_csv('insurance.csv')
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'])
Out[7]:
<AxesSubplot:xlabel='bmi', ylabel='charges'>
In [8]:
# Scatter plots can be used to display the relationships between three variables
# One way of doing this is by color-coding the points.
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'],
                hue=insurance_data['smoker'])
Out[8]:
<AxesSubplot:xlabel='bmi', ylabel='charges'>
In [9]:
iris_data = pd.read_csv('iris.csv', index_col="Id")
print(iris_data.shape)
iris_data.head()
(150, 5)
Out[9]:
Sepal Length Sepal Width (cm) Petal Length Petal Width (cm) Species
Id
1 5.1 3.5 1.4 0.2 Iris-setosa
2 4.9 3.0 1.4 0.2 Iris-setosa
3 4.7 3.2 1.3 0.2 Iris-setosa
4 4.6 3.1 1.5 0.2 Iris-setosa
5 5.0 3.6 1.4 0.2 Iris-setosa
In [12]:
# sns.distplot(a=iris_data['Petal Length'], kde=False, vertical = True)
sns.displot(iris_data, y = 'Petal Length')
# sns.displot(iris_data, x = 'Petal Length')
Out[12]:
<seaborn.axisgrid.FacetGrid at 0x147f6c850>
In [13]:
# KDE plot
sns.kdeplot(iris_data['Petal Length'], shade = True)
Out[13]:
<AxesSubplot:xlabel='Petal Length', ylabel='Density'>
In [14]:
# 2D KDE plot
sns.jointplot(x=iris_data['Petal Length'], y=iris_data['Sepal Width (cm)'], kind="kde", color = 'green')
Out[14]:
<seaborn.axisgrid.JointGrid at 0x15400ead0>
In [ ]: