🌍 Notebook at a glance
In this notebook, we delve deep into the world of inequality through the lens of data visualization. Our primary aim is to derive insights and portray the trends effectively to our audience.
This Jupyter Notebook examines global inequality using three key indicators: Gross Domestic Product (GDP), Human Development Index (HDI), and the Gini Index. These metrics help assess economic performance, human development, and income inequality across different countries. The analysis includes data processing, visualization, and interpretation of these indicators to understand global disparities.
Through data visualization and comparative analysis, this notebook aims to provide insights into global economic and social inequalities.
🛠️ Install packages
!python -m pip install --upgrade pip -q
!pip install geopandas -q
### 📊 Bubble Plot: GDP Per Capita vs. Gini Index
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Define file paths
= r"C:\Users\11\Desktop\Python\Python Project\Global Inequlity Analysis\Gini Index.xlsx"
gini_file_path = r"C:\Users\11\Desktop\Python\Python Project\Global Inequlity Analysis\NationalGDP.xls"
gdp_file_path
# Read the Excel files
= pd.read_excel(gini_file_path, engine="openpyxl")
df = pd.read_excel(gdp_file_path, engine="xlrd")
GDP
# Clean column names by stripping spaces and replacing inner spaces with underscores
= GDP.columns.str.strip().str.replace(r'\s+', '_', regex=True)
GDP.columns = df.columns.str.strip().str.replace(r'\s+', '_', regex=True)
df.columns
# Pivot the Gini Index table so that each country's yearly values become columns
= df.pivot_table(values="Gini_Index", index=['Country', 'ISO-3_Code'], columns="Year")
pivot_df
# Select the years 2013 to 2022
= pivot_df[[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]]
IEQ_10 = GDP[['Country_Code', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']]
GDP_10
# Compute the median Gini Index (IEQ value) for each country
= IEQ_10.median(axis=1).reset_index()
IEQ_10_Median
# Compute the mean GDP across the selected years for each country
= GDP_10.set_index('Country_Code').mean(axis=1).reset_index()
GDP_10_Mean
# Rename ISO-3_Code to Country_Code in the IEQ dataframe for merging
={"ISO-3_Code": "Country_Code"}, inplace=True)
IEQ_10_Median.rename(columns
# Merge the two datasets on Country_Code
= IEQ_10_Median.merge(GDP_10_Mean, on='Country_Code', suffixes=('_ieq', '_gdp'))
Co_IEQ_GDP
# For clarity, rename the merged numeric columns to '0_ieq' and '0_gdp'
# The merged DataFrame typically ends up with the new GDP column as the third column.
={Co_IEQ_GDP.columns[2]: '0_ieq', Co_IEQ_GDP.columns[3]: '0_gdp'}, inplace=True)
Co_IEQ_GDP.rename(columns
print(Co_IEQ_GDP.head())
# Drop rows with non-finite values in '0_gdp' or '0_ieq'
= Co_IEQ_GDP[np.isfinite(Co_IEQ_GDP['0_gdp']) & np.isfinite(Co_IEQ_GDP['0_ieq'])]
Co_IEQ_GDP
# Sort the DataFrame by '0_gdp' (mean GDP) in ascending order
= Co_IEQ_GDP.sort_values(by='0_gdp', ascending=True)
Co_IEQ_GDP
# Set up the bubble plot figure
=(12, 8))
plt.figure(figsize="whitegrid", palette="muted", font="sans-serif", font_scale=1.3)
sns.set_theme(style=(12, 8), dpi=300)
plt.figure(figsize# Create the bubble plot:
= sns.scatterplot(
scatter =Co_IEQ_GDP,
data='0_gdp', # x-axis: mean GDP
x='0_ieq', # y-axis: median Gini Index
y='0_gdp', # Bubble size based on mean GDP (you can change this if desired)
size=(50, 1000), # Adjust bubble size range
sizes=0.7, # Transparency for bubbles
alpha='Country',
hue=False,
legend
)
# Annotate each bubble with the Country_Code
for _, row in Co_IEQ_GDP.iterrows():
# Only annotate if both values are finite
if np.isfinite(row['0_gdp']) and np.isfinite(row['0_ieq']):
'0_gdp'], row['0_ieq'], row['Country_Code'],
plt.text(row[=9, ha='center', va='center')
fontsize'log')
plt.xscale(# Customize the plot
"Bubble Plot: GDP Per Capita vs. Gini Index", fontsize=16)
plt.title("GDP Per Capita(2013-2022)", fontsize=14)
plt.xlabel("Gini Index (2013-2022)", fontsize=14)
plt.ylabel(True)
plt.grid(
plt.tight_layout()
# Display the plot
plt.show()
Country Country_Code 0_ieq 0_gdp
0 Albania ALB 32.8 5105.599340
1 Algeria DZA NaN 4765.032468
2 Angola AGO 51.3 2855.680197
3 Argentina ARG 41.8 12102.698302
4 Armenia ARM 31.5 4275.722679
<Figure size 1200x800 with 0 Axes>
### 📊 by Inequality in income(2021)
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
# Load world map data
= gpd.read_file(r"C:\Users\11\Desktop\Python\Python Project\Global Inequlity Analysis\110m_cultural\ne_110m_admin_0_countries.shp")
world
# Load GINI Index data
= pd.read_excel(r"C:\Users\11\Desktop\Python\Python Project\Global Inequlity Analysis\Gini Index.xlsx")
GINI
# Strip spaces from column names
= GINI.columns.str.strip()
GINI.columns
# Ensure 'Year' is numeric
'Year'] = pd.to_numeric(GINI['Year'], errors='coerce')
GINI[
# Filter data for years 2013-2022 and calculate the median
= GINI[(GINI['Year'] >= 2013) & (GINI['Year'] <= 2022)]
GINI_filtered = GINI_filtered.pivot_table(values="Gini Index", index=['Country', 'ISO-3 Code'], aggfunc='median')
Pivot_GINI
# Reset index so 'ISO-3 Code' is no longer part of the index
=True)
Pivot_GINI.reset_index(inplace
# Rename median column
= Pivot_GINI.rename(columns={"Gini Index": 'GINI_Median'})
GINI_median
# Merging datasets
= world.set_index('SOV_A3').join(GINI_median.set_index('ISO-3 Code'))
merged_gini
# Filter out invalid values
= merged_gini[merged_gini['GINI_Median'] > 0]
filtered_merged_gini
# Plot the GINI Index map
= plt.subplots(1, 1, figsize=(15, 10))
fig, ax ='GINI_Median', ax=ax, legend=True, cmap="RdYlGn_r",
filtered_merged_gini.plot(column={'label': "Median GINI Index (2013-2022) by Country", 'orientation': "horizontal"})
legend_kwds
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
# Load GINI data
= pd.read_excel(r"C:\Users\11\Desktop\Python\Python Project\Global Inequlity Analysis\Gini Index.xlsx")
GINI
# Strip spaces from column names
= GINI.columns.str.strip()
GINI.columns
# Ensure 'Year' is numeric
'Year'] = pd.to_numeric(GINI['Year'], errors='coerce')
GINI[
# Filter data for years 2013-2022 and calculate the median
= GINI[(GINI['Year'] >= 2013) & (GINI['Year'] <= 2022)]
GINI_filtered = GINI_filtered.pivot_table(values="Gini Index", index=['Country', 'ISO-3 Code'], aggfunc='median')
Pivot_GINI
# Reset index so 'ISO-3 Code' is no longer part of the index
=True)
Pivot_GINI.reset_index(inplace
print(Pivot_GINI.head())
# Sort and select top/bottom 30
= Pivot_GINI.sort_values('Gini Index', ascending=False)
sorted_GINI = sorted_GINI.head(30)
top_30_GINI = sorted_GINI.tail(30)
bottom_30_GINI
def highlight_top3(rank):
if rank == 1:
return 'gold'
elif rank == 2:
return 'silver'
elif rank == 3:
return 'brown'
else:
return 'skyblue'
# Function to create lollipop chart
def lollipop_chart(data, title):
# Sort data
= data.sort_values('Gini Index', ascending=False)
sorted_data
# Create base figure and axis
= plt.subplots(figsize=(12, 12))
fig, ax
# Lollipop lines
=sorted_data['Country'], ymin=0, ymax=sorted_data['Gini Index'], color='gray', alpha=0.6)
ax.vlines(x
# Lollipop heads
'Country'], sorted_data['Gini Index'], color=[highlight_top3(rank) for rank in range(1, len(data)+1)], s=75, alpha=0.6)
ax.scatter(sorted_data[
# Title & grid
={'size':15})
ax.set_title(title, fontdict='--', alpha=0.6)
ax.grid(linestyle'Country')
ax.set_xlabel('Gini Index (Median 2013-2022)')
ax.set_ylabel(=90)
plt.xticks(rotation
# Display
# For top ranks to appear on the left side.
plt.gca().invert_xaxis()
plt.show()
# Create lollipop charts for top and bottom 30 countries
'Top 30 Countries that have the lowest Gini Index (Median 2013-2022)')
lollipop_chart(bottom_30_GINI, 'Bottom 30 Countries that have the lowest Gini Index (Median 2013-2022)')
lollipop_chart(top_30_GINI,
Country ISO-3 Code Gini Index
0 Albania ALB 32.8
1 Angola AGO 51.3
2 Argentina ARG 41.8
3 Armenia ARM 31.5
4 Australia AUS 34.3
📊 choropleth map by HDI ranking
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
# Load world map data
= gpd.read_file(r"C:\Users\11\Desktop\Python\Python Project\Global Inequlity Analysis\110m_cultural\ne_110m_admin_0_countries.shp")
world
# Load HDI data
= pd.read_excel(r"C:\Users\11\Desktop\Python\Python Project\Global Inequlity Analysis\Human development index (HDI).xlsx")
HDI
# Strip spaces from column names
= HDI.columns.str.strip()
HDI.columns
# Ensure 'Year' is numeric
'Year'] = pd.to_numeric(HDI['Year'], errors='coerce')
HDI[
# Pivot Table
= HDI.pivot_table(values="Human development index (HDI)", index=['Country', 'ISO-3 Code'], columns="Year")
Pivot_HDI
# Reset index so 'ISO-3 Code' is no longer part of the index
=True)
Pivot_HDI.reset_index(inplace
# Extract HDI for 2022
if 2022 in Pivot_HDI.columns: # Ensure 2022 data exists
= Pivot_HDI[['ISO-3 Code', 2022]].rename(columns={2022: 'HDI_2022'})
HDI_2022 else:
raise ValueError("HDI data for 2022 is not available in the dataset.")
# Print first rows to verify
# Merging datasets
= world.set_index('SOV_A3').join(HDI_2022.set_index('ISO-3 Code'))
merged
# Print to verify the merge
# Filter out countries with HDI rank of 0
= merged[merged['HDI_2022'] > 0]
filtered_merged
= plt.subplots(1, 1, figsize=(15, 10))
fig, ax ='HDI_2022', ax=ax, legend=True, cmap="RdYlGn",
filtered_merged.plot(column={'label': "HDI Rank by Country", 'orientation': "horizontal"})
legend_kwds plt.show()