Bureau of Labor Statistics - Benjamin Gaines

#data #pe_utils #_2023 The Bureau of Labor Statistics (BLS) is a key agency within the U.S. Department of Labor that collects, analyzes, and disseminates essential economic data related to employment, labor markets, inflation, and productivity. Its mission is to provide accurate and timely statistics to support public and private decision-making. I use BLS data for diligence and business analysis. Link: Script: ```py # py_bls.py import json import time import pandas import sqlite3 import requests from functools import reduce # -------------------------------- # # constants # -------------------------------- # # bls series codes and titles bls_series_dict = { # employment 'LNS11000000':'civilian_labor_force', 'LNS12000000':'civilian_employment', 'LNS13000000':'civilian_unemployment', 'LNS14000000':'unemployment_rate', 'CES0000000001':'total_non_farm_employment', 'CES0500000002':'total_price_avg_weekly_hours_of_all_employees', 'CES0500000007':'total_private_avg_weekly_hours_of_prod_and_nonsup_employees', 'CES0500000003':'total_private_avg_hourly_earnings_of_all_employees', 'CES0500000008':'total_private_avg_hourly_earnings_of_prod_and_nonsup_employees', # productivity 'PRS85006092':'output_per_hour_non_farm_business_productivity', 'PRS85006112':'nonfarm_business_unit_labor_costs', 'PRS85006152':'nonfarm_business_real_hourly_compensation', 'MPU4910012':'private_nonfarm_business_multifactor_productivity_annual_index', # price indexes 'CUUR0000SA0':'cpi_for_all_urban_consumers_cpi_u_1982_84_100_unadjusted', 'CUUR0000AA0':'cpi_for_all_urban_consumers_cpi_u_1967_100_unadjusted', 'CWUR0000SA0':'cpi_for_urban_wage_earners_and_clerical_workers_cpi_w_1982_84_100_unadjusted', 'CUUR0000SA0L1E':'cpi_u_less_food_and_energy_unadjusted', 'CWUR0000SA0L1E':'cpi_w_less_food_and_energy_unadjusted', 'WPSFD4':'ppi_final_demand_seasonally_adjusted', 'WPUFD4':'ppi_final_demand_unadjusted', 'WPUFD49104':'ppi_final_demand_less_foods_and_energy_unadjusted', 'WPUFD49116':'ppi_final_demand_less_foods_energy_and_trade_services_unadjusted', 'WPUFD49207':'ppi_finished_goods_1982_100_unadjusted', 'EIUIR':'imports_all_commodities', 'EIUIQ':'exports_all_commodities', # compensation 'CIU1010000000000A':'employment_cost_index_eci_civilian_unadjusted', 'CIU2010000000000A':'eci_private_unadjusted', 'CIU2020000000000A':'eci_private_wage_and_salaries_unadjusted', } # -------------------------------- # # functions # -------------------------------- # def clean_df_headers(bad_header_df): ''' Gets rid of unwanted characters in dataframe headers ''' # characters to replace replace_dict = { ' ': '_', '-': '_', ',': '', '.': '', '/': '_', '(': '', ')': '', '#': '_no', ': '', '&': 'and', '+':'plus', '*':'', "'":'', '%':'pct', } # lambda func to replace bad characters bad_header_df.columns = [reduce(lambda a, kv: a.replace(*kv), replace_dict.items(), col).lower() for col in bad_header_df.columns] return bad_header_df def get_bls_data(series_id:str,start_year:str,end_year:str): ''' Makes a dataframe from a BLS series of data ''' # headers and data headers = {'Content-type': 'application/json'} data = json.dumps( {"seriesid": [series_id], "startyear":start_year, "endyear":end_year} ) # call api response = requests.post('https://api.bls.gov/publicAPI/v1/timeseries/data/', data=data, headers=headers) # make dataframe json_data = json.loads(response.text) bls_df = pandas.DataFrame(json_data['Results']['series'][0]['data']) return bls_df def upload_df_to_sqlite(d_frame, sqlite_db_name:str, table_name:str): ''' Uploads all csv files in the current directory to a sqlite db ''' # connect to the db conn = sqlite3.connect(sqlite_db_name) cur = conn.cursor() # make string for ease of upload d_frame = d_frame.astype(str) # upload and ***REPLACE*** d_frame.to_sql(f'bls_{table_name}', conn, if_exists='replace', index=False) # close the conn conn.close() # -------------------------------- # # execute functions # -------------------------------- # for key, value in bls_series_dict.items(): # make df from bls series df_a = get_bls_data(key,'2014','2024') # clean df headers df_b = clean_df_headers(df_a) # upload to sqlite upload_df_to_sqlite(df_b, 'db_bls.db', value) # print statement print(f'Uploaded: {value}') # pause 3 seconds time.sleep(3) ```