import numpy as np
import pandas as pd
df3 = pd.DataFrame({
'Place': ['AMR', 'BHP', 'CHN', 'VELLORE'],
'State' : ['AP', 'MP', 'TN','TN']
})
df3
| Place | State | |
|---|---|---|
| 0 | AMR | AP |
| 1 | BHP | MP |
| 2 | CHN | TN |
| 3 | VELLORE | TN |
df4 = pd.DataFrame({
'Place': ['AMR', 'BHP', 'CHN', 'VELLORE'],
'Students' : [3000, 4000, 10000, 50000]
})
df4
| Place | Students | |
|---|---|---|
| 0 | AMR | 3000 |
| 1 | BHP | 4000 |
| 2 | CHN | 10000 |
| 3 | VELLORE | 50000 |
df_merge1 = pd.merge(df3, df4)
df_merge1
| Place | State | Students | |
|---|---|---|---|
| 0 | AMR | AP | 3000 |
| 1 | BHP | MP | 4000 |
| 2 | CHN | TN | 10000 |
| 3 | VELLORE | TN | 50000 |
df5 = pd.DataFrame({
'State': ['AP', 'MP', 'TN'],
'NumberOfCampuses' : [1, 1, 2]
})
df5
| State | NumberOfCampuses | |
|---|---|---|
| 0 | AP | 1 |
| 1 | MP | 1 |
| 2 | TN | 2 |
df_merge2 = pd.merge(df3, df5)
df_merge2
| Place | State | NumberOfCampuses | |
|---|---|---|---|
| 0 | AMR | AP | 1 |
| 1 | BHP | MP | 1 |
| 2 | CHN | TN | 2 |
| 3 | VELLORE | TN | 2 |
df6 = pd.DataFrame({
'employee': ['Bob', 'Jake', 'Lisa', 'Sue'],
'group': ['Accounting', 'Engineering', 'Engineering', 'HR']
})
df6
| employee | group | |
|---|---|---|
| 0 | Bob | Accounting |
| 1 | Jake | Engineering |
| 2 | Lisa | Engineering |
| 3 | Sue | HR |
df7 = pd.DataFrame({'group': ['Accounting', 'Accounting',
'Engineering', 'Engineering',
'HR', 'HR'],
'skills': ['math', 'spreadsheets',
'coding', 'linux',
'spreadsheets', 'organization']})
df7
| group | skills | |
|---|---|---|
| 0 | Accounting | math |
| 1 | Accounting | spreadsheets |
| 2 | Engineering | coding |
| 3 | Engineering | linux |
| 4 | HR | spreadsheets |
| 5 | HR | organization |
df_merge3 = pd.merge(df6, df7)
df_merge3
| employee | group | skills | |
|---|---|---|---|
| 0 | Bob | Accounting | math |
| 1 | Bob | Accounting | spreadsheets |
| 2 | Jake | Engineering | coding |
| 3 | Jake | Engineering | linux |
| 4 | Lisa | Engineering | coding |
| 5 | Lisa | Engineering | linux |
| 6 | Sue | HR | spreadsheets |
| 7 | Sue | HR | organization |
df_concat4 = pd.concat([df3, df4], axis = 1)
df_concat4
| Place | State | Place | Students | |
|---|---|---|---|---|
| 0 | AMR | AP | AMR | 3000 |
| 1 | BHP | MP | BHP | 4000 |
| 2 | CHN | TN | CHN | 10000 |
| 3 | VELLORE | TN | VELLORE | 50000 |
# Change the name of 'Place' to 'City'
df4 = df4.rename(columns = {'Place' : 'City'})
df4
| City | Students | |
|---|---|---|
| 0 | AMR | 3000 |
| 1 | BHP | 4000 |
| 2 | CHN | 10000 |
| 3 | VELLORE | 50000 |
# use left_on and right_on when you may wish to merge two datasets with different column names
print(pd.merge(df3, df4, left_on = "Place", right_on = 'City'))
Place State City Students 0 AMR AP AMR 3000 1 BHP MP BHP 4000 2 CHN TN CHN 10000 3 VELLORE TN VELLORE 50000
# merge on an index can be used when we want to merge based on index of the dataframes
df1 = df3.set_index('Place')
df2 = df4.set_index('City')
print(df1)
print(df2)
State
Place
AMR AP
BHP MP
CHN TN
VELLORE TN
Students
City
AMR 3000
BHP 4000
CHN 10000
VELLORE 50000
pd.merge(df1, df2, left_index = True, right_index = True)
| State | Students | |
|---|---|---|
| Place | ||
| AMR | AP | 3000 |
| BHP | MP | 4000 |
| CHN | TN | 10000 |
| VELLORE | TN | 50000 |
left_index with right_on or left_on with right_indexjoin() method performs a merge on indicesprint(df1.join(df2))
State Students Place AMR AP 3000 BHP MP 4000 CHN TN 10000 VELLORE TN 50000
df8 = pd.DataFrame({
'IIT': ['CHN', 'MUM','DEL', 'HYD'],
'State': ['TN', 'MH', 'DEL', 'TEL']
})
df8
| IIT | State | |
|---|---|---|
| 0 | CHN | TN |
| 1 | MUM | MH |
| 2 | DEL | DEL |
| 3 | HYD | TEL |
pd.merge(df3, df8, how = 'inner')
| Place | State | IIT | |
|---|---|---|---|
| 0 | CHN | TN | CHN |
| 1 | VELLORE | TN | CHN |
pd.merge(df3, df8, how = 'outer')
| Place | State | IIT | |
|---|---|---|---|
| 0 | AMR | AP | NaN |
| 1 | BHP | MP | NaN |
| 2 | CHN | TN | CHN |
| 3 | VELLORE | TN | CHN |
| 4 | NaN | MH | MUM |
| 5 | NaN | DEL | DEL |
| 6 | NaN | TEL | HYD |
pd.merge(df3, df8, how = 'inner', left_on = 'Place', right_on = 'IIT')
| Place | State_x | IIT | State_y | |
|---|---|---|---|---|
| 0 | CHN | TN | CHN | TN |