import matplotlib
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
# Load the data set
fifa_filepath = "fifa.csv"
fifa_data = pd.read_csv(fifa_filepath, index_col="Date", parse_dates=True)
print(fifa_data.shape)
fifa_data.head(10)
(286, 6)
| ARG | BRA | ESP | FRA | GER | ITA | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 1993-08-08 | 5.0 | 8.0 | 13.0 | 12.0 | 1.0 | 2.0 |
| 1993-09-23 | 12.0 | 1.0 | 14.0 | 7.0 | 5.0 | 2.0 |
| 1993-10-22 | 9.0 | 1.0 | 7.0 | 14.0 | 4.0 | 3.0 |
| 1993-11-19 | 9.0 | 4.0 | 7.0 | 15.0 | 3.0 | 1.0 |
| 1993-12-23 | 8.0 | 3.0 | 5.0 | 15.0 | 1.0 | 2.0 |
| 1994-02-15 | 9.0 | 2.0 | 6.0 | 14.0 | 1.0 | 7.0 |
| 1994-03-15 | 8.0 | 2.0 | 6.0 | 15.0 | 1.0 | 11.0 |
| 1994-04-19 | 10.0 | 1.0 | 7.0 | 15.0 | 2.0 | 13.0 |
| 1994-05-17 | 6.0 | 1.0 | 9.0 | 17.0 | 2.0 | 16.0 |
| 1994-06-14 | 8.0 | 3.0 | 5.0 | 13.0 | 1.0 | 4.0 |
plt.figure(figsize=(16,6))
# Line chart showing how FIFA rankings evolved over time
sns.lineplot(data=fifa_data)
plt.title('FIFA RANKINGS')
Text(0.5, 1.0, 'FIFA RANKINGS')
plt.figure(figsize=(16,6))
# Line chart showing how FIFA rankings evolved over time
plt.title("FIFA RANKINGS")
ax = sns.lineplot(data=fifa_data.loc[:, ['ARG', 'BRA']])
# ax = sns.lineplot(data=fifa_data['BRA'], label = 'BRA_rank')
ax.set(xlabel = "Year", ylabel = "Rank")
# plt.ylabel ("Rank")
[Text(0.5, 0, 'Year'), Text(0, 0.5, 'Rank')]
# Set the width and height of the figure
plt.figure(figsize=(14,7))
flight_data = pd.read_csv('flight_delays.csv', index_col="Month")
# Add title
plt.title("Average Arrival Delay for Each Airline, by Month")
# print(flight_data.head())
# Heatmap showing average arrival delay for each airline by month
ax = sns.heatmap(data=flight_data, annot=True)
# Add label for horizontal axis
ax.set(xlabel ="X- Axis Airline", ylabel = ("Month 2015"))
[Text(0.5, 42.0, 'X- Axis Airline'), Text(105.0, 0.5, 'Month 2015')]
plt.figure(figsize=(14,7))
sns.barplot(x=flight_data.index, y=flight_data['DL'])
<AxesSubplot:xlabel='Month', ylabel='DL'>
insurance_data = pd.read_csv('insurance.csv')
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'])
<AxesSubplot:xlabel='bmi', ylabel='charges'>
# Scatter plots can be used to display the relationships between three variables
# One way of doing this is by color-coding the points.
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'],
hue=insurance_data['smoker'])
<AxesSubplot:xlabel='bmi', ylabel='charges'>
iris_data = pd.read_csv('iris.csv', index_col="Id")
print(iris_data.shape)
iris_data.head()
(150, 5)
| Sepal Length | Sepal Width (cm) | Petal Length | Petal Width (cm) | Species | |
|---|---|---|---|---|---|
| Id | |||||
| 1 | 5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa |
| 2 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
| 3 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
| 4 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
| 5 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
# sns.distplot(a=iris_data['Petal Length'], kde=False, vertical = True)
sns.displot(iris_data, y = 'Petal Length')
# sns.displot(iris_data, x = 'Petal Length')
<seaborn.axisgrid.FacetGrid at 0x147f6c850>
# KDE plot
sns.kdeplot(iris_data['Petal Length'], shade = True)
<AxesSubplot:xlabel='Petal Length', ylabel='Density'>
# 2D KDE plot
sns.jointplot(x=iris_data['Petal Length'], y=iris_data['Sepal Width (cm)'], kind="kde", color = 'green')
<seaborn.axisgrid.JointGrid at 0x15400ead0>