# import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colorbar
import pandas as pd
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

%matplotlib notebook

# use pandas to read the csv from the World Happiness Report
url = 'https://raw.githubusercontent.com/npanczyk/npanczyk.github.io/main/happiness.csv'
df = pd.read_csv(url)
# sort the data by year
sorted_df = df.sort_values(by='year')
# make an array of the sorted data
sorted_arr = sorted_df.values
# array of years (in order)
ordered_years = sorted_arr[:,1]
# find the index of the last entry of 2005 (earliest year)
year_list = np.arange(2005, 2021, 1)
l_list = np.zeros(len(year_list))
for year in ordered_years:
    for i in range( len(l_list)):
        if year == year_list[i]:
            l_list[i] += 1
            if i < len(l_list) - 1:
                l_list[i + 1] = l_list[i]
        else:
            continue
l = int(l_list[0])
early_data = sorted_arr[0:l,:]
# repeat this process in reverse to find the 2020 data
sorted2_df = df.sort_values(by='year', ascending=False)
sorted2_arr = sorted2_df.values
# array of years (in reverse order)
reverse_years = sorted2_arr[:,1]
# find the index of the last entry of 2020 (latest year)
l2 = 0
for i in reverse_years:
    if i == 2020:
        l2 += 1
    else:
        break
        
late_data = sorted2_arr[0:l2,:]
late_countries = late_data[:,0]

total_countries = np.unique(sorted_arr[:,0])

# 2005 data to plot
early_countries = early_data[:,0]
early_life_exp = early_data[:,5]


plt.style.use('seaborn-darkgrid')
fig, ax = plt.subplots(figsize=(25,8))

def make_plot(year):
    def plot_country(year):
        if year == 2005:
            return ax.plot(early_countries, early_life_exp, marker='o', ls='', label='2005')
        else:
            stop = np.where(year_list == year)[0]
            start = stop - 1
            data = sorted_arr[int(l_list[start]):int(l_list[stop]),:]
            countries = data[:,0]
            life_exp = data[:,5]
            return ax.plot(countries, life_exp, marker='o', ls='', label=str(year))
    plot_country(year)
    ax.set_title('Life Expectancy by Country from 2005-2020')
    ax.set_xlabel('Country')
    ax.set_ylabel('Life Expectancy at Birth (Years)')
    ax.set_xticks(total_countries)
    ax.set_xticklabels(total_countries,rotation=90)
    ax.legend(loc='upper right')
    return

def static_plot(Static):
    if Static == True:
        for i in year_list:
            make_plot(i)
    if Static == False:
        plt.cla()
        interact(slide, year2=(2007,2021,1), continuous_update=True)

def slide(year2):
    plt.cla()
    years = np.arange(2005,year2,1)
    for i in years:
        make_plot(i)

interact(static_plot, Static=True)

<function __main__.static_plot(Static)>


%matplotlib inline


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import mplcursors
from geopy.geocoders import Nominatim
%matplotlib widget

df = pd.read_csv("https://raw.githubusercontent.com/npanczyk/npanczyk.github.io/main/happiness.csv")

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
countries = df.groupby(["Country name"]).mean()
country = countries.reset_index()

geolocator = Nominatim(user_agent="Generosity")
lat = []
long = []
for ind in country.index - 1:
    location = geolocator.geocode(country["Country name"][ind+1], timeout = 10)
    if location is None:
        lat.append(np.nan)
        long.append(np.nan)
    else:
        latitude = location.latitude
        longitude = location.longitude  
    
    lat.append(latitude)
    long.append(longitude)
la = pd.DataFrame()
la["lat"] = lat
la["long"] = long

country["lat"] = la["lat"]
country["long"] = la["long"]

c = pd.DataFrame()
c["Country name"] = country['Country name']
c["Generosity"] = country['Generosity']
c["Lat"] = country['lat']
c["Long"] = country['long']

fig, ax = plt.subplots()

ax.set_aspect('equal')

world.plot(ax=ax, color='white', edgecolor='black')
sc = ax.scatter(c["Long"], c["Lat"], s = 1)
mplcursors.cursor(hover = True).connect(
    "add", lambda sel: sel.annotation.set_text(c["Generosity"][sel.index]))
plt.show()

Warning: Cannot change to a different GUI toolkit: widget. Using notebook instead.


#Load necessary libraries
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Libraries needed for the data import

import pandas as pd
import requests
import io
    
# Downloading the csv file from my GitHub account
url = "https://raw.githubusercontent.com/eguerr23/eguerr23.github.io/main/DataPanelWHR2021.csv" # Make sure the url is the raw version of the file on GitHub


df = pd.read_csv(url)

#Clean data set for relevant columns

df = df.sort_values(by='Freedom to make life choices', ascending=False)[['Country name', 'year', 'Freedom to make life choices']]


relevant = df[["Country name", "year", "Freedom to make life choices"]].head(50)

relevant.sort_values(by='Freedom to make life choices', ascending=False)[['Country name', 'year', 'Freedom to make life choices']]
relevant.groupby('Country name')['Freedom to make life choices'].mean()

#Visualization showing the 50 countries with the highest avg. score of freedom to make life choices of all time in the data set year column

plt.rcParams["figure.figsize"] = (10, 10)
plt.bar("Country name","Freedom to make life choices", data = relevant, color = "blue")
plt.title("Top 50 Average Countries")
plt.xlabel("Country")
plt.ylabel("Freedom to make life choices Score")
plt.xticks(rotation=90)
plt.style.use('fivethirtyeight')
plt.figure(figsize=(250,500))

<Figure size 18000x36000 with 0 Axes>

<Figure size 18000x36000 with 0 Axes>


relevant1 = df[["Country name", "year", "Freedom to make life choices"]].tail(50)

relevant1.sort_values(by='Freedom to make life choices', ascending=False)[['Country name', 'year', 'Freedom to make life choices']]
relevant1.groupby('Country name')['Freedom to make life choices'].mean()

plt.rcParams["figure.figsize"] = (10, 10)
plt.bar("Country name","Freedom to make life choices", data = relevant1, color = "red")
plt.title("Lowest 50 Average Countries")
plt.xlabel("Country")
plt.ylabel("Freedom to make life choices Score")
plt.xticks(rotation=90)
plt.style.use('fivethirtyeight')
plt.figure(figsize=(50,100))

<Figure size 3600x7200 with 0 Axes>

<Figure size 3600x7200 with 0 Axes>


%%html
<iframe src='https://shubhamjain2798.github.io/HappinessData-VegaLite-Viz/' height=800 width="100%"></iframe>


#load libraries
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np
import pandas as pd


df=pd.read_csv('https://raw.githubusercontent.com/npanczyk/npanczyk.github.io/main/happiness.csv')

#create dataframe with columns of intertest
data = df[['Country name', 'Log GDP per capita', 'Social support']]


#Group data on county, and calculate average of 'Log GDP per capita' and 'Freedom to make life choices
g_data = data.groupby('Country name')['Log GDP per capita', 'Social support'].mean()

<ipython-input-18-18971e6370b2>:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  g_data = data.groupby('Country name')['Log GDP per capita', 'Social support'].mean()


#sort rows by Log GDP values
g_data = g_data.sort_values(by='Log GDP per capita', ascending=False)

#remove countries with no log GDP per capita value, then turn NaN generosity values to 0 so other column data is still displayed
data_withGDP = g_data.dropna(subset=['Log GDP per capita'])

#turn country name index to useable column
df_final = data_withGDP.reset_index()


#transform for visibility
df_final["Social support"] = 10 * df_final["Social support"]
top_25 = df_final.head(25)
bottom_25 = df_final.tail(25)


ax = top_25.plot(x='Country name', secondary_y='Freedom and Social Support',
        kind='bar', color = ['g', 'xkcd:sky blue'],
        stacked=False,
        title='Top 25 countries by Avg. Log GDP per capita, with Avg. Social Support Rating',
        figsize=(20, 10))
#ax2=ax.twinax()
#ax2.set_yscale()
plt.ylabel('Log GDP, Social Support (out of 10) ')
# plt.figure(figsize=(20, 12), dpi=100)

Text(0, 0.5, 'Log GDP, Social Support (out of 10) ')


ax = bottom_25.plot(x='Country name', secondary_y='Freedom and Social Support',
        kind='bar', color = ['g', 'xkcd:sky blue'],
        stacked=False,
        title='Bottom 25 countries by Avg. Log GDP per capita, with Avg. Social Support Rating',
        figsize=(20, 10))
#ax2=ax.twinax()
#ax2.set_yscale()
plt.ylabel('Log GDP, Social Support (out of 10) ')
ax.set_ylim([0, 12])
# plt.figure(figsize=(12, 12), dpi=100)

(0.0, 12.0)

IS 445 - Group 5 - Final Project¶

Group Members: Nataly Panczyk, Betty Guerrero, Thomas McShane, Shubham Jain and Brian Rao¶

Objective¶

About the Data¶

Comparing Life Expectancies in Different Countries From 2005 to 2006¶

Generosity Rankings by Country and Year¶

Average Freedom to Make Life Choices Countries¶

Positive affect in various Countries as a function of year¶

IS 445 - Group 5 - Final Project¶

Group Members: Nataly Panczyk, Betty Guerrero, Thomas McShane, Shubham Jain and Brian Rao¶

Objective¶

About the Data¶

Comparing Life Expectancies in Different Countries From 2005 to 2006¶

Generosity Rankings by Country and Year¶

Average Freedom to Make Life Choices Countries¶

Positive affect in various Countries as a function of year¶

Top and Bottom Avg. Log GDP per Capita Countries, with Avg. Social Support Ratings¶