pd.concat(objs, axis=0, join='outer', join_axes=None,
        ignore_index=False, keys=None, 
        levels=None, names=None,
        verify_integrity=False, sort=None, copy=True)


import numpy as np
import pandas as pd


df1 = pd.Series(['AMR', 'BHP', 'CHN', 'VEL', 'VEL'], name = 'Place' )
df1

0    AMR
1    BHP
2    CHN
3    VEL
4    VEL
Name: Place, dtype: object


df2 = pd.Series(['AP', 'MP', 'TN', 'TN', 'TN'], name = 'state')
df2

0    AP
1    MP
2    TN
3    TN
4    TN
Name: state, dtype: object


df_concat1 = pd.concat([df1, df2])    # Note that default join operation 
                                      # is 'outer' join
df_concat1

0    AMR
1    BHP
2    CHN
3    VEL
4    VEL
0     AP
1     MP
2     TN
3     TN
4     TN
dtype: object


# Use verify_integrity to check for duplicates
df_concat1 = pd.concat([df1, df2], verify_integrity = True)  
df_concat1

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [5], in <cell line: 2>()
      1 # Use verify_integrity to check for duplicates
----> 2 df_concat1 = pd.concat([df1, df2], verify_integrity = True)  
      3 df_concat1

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/util/_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    305 if len(args) > num_allow_args:
    306     warnings.warn(
    307         msg.format(arguments=arguments),
    308         FutureWarning,
    309         stacklevel=stacklevel,
    310     )
--> 311 return func(*args, **kwargs)

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/reshape/concat.py:347, in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
    143 @deprecate_nonkeyword_arguments(version=None, allowed_args=["objs"])
    144 def concat(
    145     objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame],
   (...)
    154     copy: bool = True,
    155 ) -> DataFrame | Series:
    156     """
    157     Concatenate pandas objects along a particular axis with optional set logic
    158     along the other axes.
   (...)
    345     ValueError: Indexes have overlapping values: ['a']
    346     """
--> 347     op = _Concatenator(
    348         objs,
    349         axis=axis,
    350         ignore_index=ignore_index,
    351         join=join,
    352         keys=keys,
    353         levels=levels,
    354         names=names,
    355         verify_integrity=verify_integrity,
    356         copy=copy,
    357         sort=sort,
    358     )
    360     return op.get_result()

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/reshape/concat.py:542, in _Concatenator.__init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
    539 self.verify_integrity = verify_integrity
    540 self.copy = copy
--> 542 self.new_axes = self._get_new_axes()

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/reshape/concat.py:612, in _Concatenator._get_new_axes(self)
    610 def _get_new_axes(self) -> list[Index]:
    611     ndim = self._get_result_dim()
--> 612     return [
    613         self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)
    614         for i in range(ndim)
    615     ]

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/reshape/concat.py:613, in <listcomp>(.0)
    610 def _get_new_axes(self) -> list[Index]:
    611     ndim = self._get_result_dim()
    612     return [
--> 613         self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)
    614         for i in range(ndim)
    615     ]

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/_libs/properties.pyx:37, in pandas._libs.properties.CachedProperty.__get__()

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/reshape/concat.py:674, in _Concatenator._get_concat_axis(self)
    669 else:
    670     concat_axis = _make_concat_multiindex(
    671         indexes, self.keys, self.levels, self.names
    672     )
--> 674 self._maybe_check_integrity(concat_axis)
    676 return concat_axis

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pandas/core/reshape/concat.py:682, in _Concatenator._maybe_check_integrity(self, concat_index)
    680 if not concat_index.is_unique:
    681     overlap = concat_index[concat_index.duplicated()].unique()
--> 682     raise ValueError(f"Indexes have overlapping values: {overlap}")

ValueError: Indexes have overlapping values: Int64Index([0, 1, 2, 3, 4], dtype='int64')


# Create a new index ignoring the original index
df_concat1 = pd.concat([df1, df2], ignore_index = True ) 
df_concat1

0    AMR
1    BHP
2    CHN
3    VEL
4    VEL
5     AP
6     MP
7     TN
8     TN
9     TN
dtype: object


df_concat2 = pd.concat([df1,df2], axis = 1)
df_concat2


df3 = pd.DataFrame({
    'Place': ['AMR', 'BHP', 'CHN', 'VEL'],
    'State' : ['AP', 'MP', 'TN','TN']
})
df3


df4 = pd.DataFrame({
    'Place': ['AMR', 'BHP', 'CHN', 'VEL'],
    'Students' : [3000, 4000, 10000, 50000]
})
df4


df_concat3 = pd.concat([df3, df4])
df_concat3


df_concat3 = pd.concat([df3, df4], join = 'inner')
df_concat3


df_concat4 = pd.concat([df3, df4], keys = ['df3', 'df4'],
                       names = ['Dataset', 'ind'])

print(df_concat4)

            Place State  Students
Dataset ind                      
df3     0     AMR    AP       NaN
        1     BHP    MP       NaN
        2     CHN    TN       NaN
        3     VEL    TN       NaN
df4     0     AMR   NaN    3000.0
        1     BHP   NaN    4000.0
        2     CHN   NaN   10000.0
        3     VEL   NaN   50000.0


df_concat4.loc[('df4',2)]

Place           CHN
State           NaN
Students    10000.0
Name: (df4, 2), dtype: object


df_concat4.loc[('df4',2), 'Students']

10000.0


df_concat4.loc['df4']     # Partial indexing


df_concat4.loc[('df3',2):('df4',2)]


df_concat4.loc[('df3',2):('df4',2), 'Place': 'Students']


# Slicing using IndexSlice
idx = pd.IndexSlice
df_concat4.loc[idx[('df3',2) : ('df4', 2)] ,  idx['Place': 'State']]


# Multi level index - slicing with iloc
idx = pd.IndexSlice

df_concat4.iloc[idx[1:4, 1]]

Dataset  ind
df3      1      MP
         2      TN
         3      TN
Name: State, dtype: object


df_concat4 = pd.concat([df3, df4], axis = 1)
df_concat4

Programming for Data Science¶

Pandas - Concat And Append¶

Introduction¶

The concat() method¶

	Place	State	Students
0	AMR	AP	NaN
1	BHP	MP	NaN
2	CHN	TN	NaN
3	VEL	TN	NaN
0	AMR	NaN	3000.0
1	BHP	NaN	4000.0
2	CHN	NaN	10000.0
3	VEL	NaN	50000.0

		Place	State	Students
Dataset	ind
df3	2	CHN	TN	NaN
df3	3	VEL	TN	NaN
df4	0	AMR	NaN	3000.0
	1	BHP	NaN	4000.0
	2	CHN	NaN	10000.0