Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Employment FBS Updates #440

Draft
wants to merge 10 commits into
base: develop
Choose a base branch
from
48 changes: 32 additions & 16 deletions flowsa/data_source_scripts/BLS_QCEW.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from flowsa.flowbyfunctions import assign_fips_location_system
from flowsa.flowbyactivity import FlowByActivity
from flowsa.flowsa_log import log
from flowsa.naics import industry_spec_key
from flowsa.naics import industry_spec_key, return_max_sector_level


def BLS_QCEW_URL_helper(*, build_url, year, **_):
Expand Down Expand Up @@ -199,7 +199,7 @@ def estimate_suppressed_qcew(fba: FlowByActivity) -> FlowByActivity:
if fba.config.get('geoscale') == 'national':
fba = fba.query('Location == "00000"')
else:
log.critical('At a subnational scale, this will take a long time.')
log.critical('At a subnational scale, this might take a long time.')

fba2 = (fba
.assign(Unattributed=fba.FlowAmount.copy(),
Expand All @@ -210,9 +210,24 @@ def estimate_suppressed_qcew(fba: FlowByActivity) -> FlowByActivity:
'48-49': '4Y'}})
)

for level in [5, 4, 3, 2]:
# subset the bls data to only keep parent-child sectors up to the target sector level,
# does not drop non-naics because we want to keep all data, as some suppressed
# data will be attributed to these non-naics

# determine max sector length for estimating suppressed data
max_level = return_max_sector_level(fba.config['industry_spec'])

# subset BLS data to drop all child naics after target sector level
fba3 = (
fba2
.query(f'ActivityProducedBy.str.len() <= {max_level}')
.reset_index(drop=True)
)

for level in range(max_level-1, 1, -1):
log.info(f"Identifying sector descendants for NAICS {level}")
descendants = pd.DataFrame(
fba2
fba3
.drop(columns='descendants')
.query(f'ActivityProducedBy.str.len() > {level}')
.assign(
Expand All @@ -235,8 +250,8 @@ def estimate_suppressed_qcew(fba: FlowByActivity) -> FlowByActivity:
'parent': 'ActivityProducedBy'})
)

fba2 = (
fba2
fba3 = (
fba3
.merge(descendants,
how='left',
on=['FlowName', 'Location', 'ActivityProducedBy'],
Expand All @@ -253,17 +268,17 @@ def estimate_suppressed_qcew(fba: FlowByActivity) -> FlowByActivity:
)
.drop(columns=['descendant_flows', 'descendants_y'])
)
fba2 = fba2.drop(columns=['descendants'])
fba3 = fba3.drop(columns=['descendants'])

indexed = (
fba2
.assign(n2=fba2.ActivityProducedBy.str.slice(stop=2),
n3=fba2.ActivityProducedBy.str.slice(stop=3),
n4=fba2.ActivityProducedBy.str.slice(stop=4),
n5=fba2.ActivityProducedBy.str.slice(stop=5),
n6=fba2.ActivityProducedBy.str.slice(stop=6),
location=fba2.Location,
category=fba2.FlowName)
fba3
.assign(n2=fba3.ActivityProducedBy.str.slice(stop=2),
n3=fba3.ActivityProducedBy.str.slice(stop=3),
n4=fba3.ActivityProducedBy.str.slice(stop=4),
n5=fba3.ActivityProducedBy.str.slice(stop=5),
n6=fba3.ActivityProducedBy.str.slice(stop=6),
location=fba3.Location,
category=fba3.FlowName)
.replace({'FlowAmount': {0: np.nan},
'n2': {'31': '3X', '32': '3X', '33': '3X',
'44': '4X', '45': '4X',
Expand Down Expand Up @@ -295,7 +310,8 @@ def fill_suppressed(
return flows

unsuppressed = indexed.copy()
for level in [2, 3, 4, 5, 6]:
for level in range(2, max_level, 1):
log.info(f"Estimating suppressed NAICS {level + 1}")
groupcols = ["{}{}".format("n", i) for i in range(2, level+1)] + [
'location', 'category']
unsuppressed = unsuppressed.groupby(
Expand Down
19 changes: 16 additions & 3 deletions flowsa/flowby.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,9 @@ def harmonize_geoscale(
fb_geoscale = geo.scale.from_string(self.config['geoscale'])
other_geoscale = geo.scale.from_string(other.config['geoscale'])

log.info(f"Harmonizing {self.full_name} {self.config['geoscale']} data "
f"with {other.full_name} {other.config['geoscale']} data")

fill_cols = self.config.get('fill_columns')
if fill_cols and 'Location' in fill_cols:
# Don't harmonize geoscales when updating Location
Expand Down Expand Up @@ -1096,9 +1099,18 @@ def proportionally_attribute(
left_on = attribute_cols + ['temp_location' if 'temp_location'
in fb else 'Location']
right_on = attribute_cols + ['Location']
for l in (left_on, right_on):
if 'Location' in self.config.get('fill_columns', []):
# if replacing df location with "other" location, drop location from merge columns
if 'Location' in self.config.get('fill_columns', []):
for l in (left_on, right_on):
l.remove('Location')
# if merging state with county data, merge on first 2 digits of location column using
# temporary "temp_location" col
if (self.config['geoscale'] == 'state') & (other.config['geoscale'] == 'county'):
fb['temp_location'] = fb['Location'].str[:2]
other['temp_location'] = other['Location'].str[:2]
for l in (left_on, right_on):
l.append('temp_location')

merged = (
fb
.merge(other,
Expand Down Expand Up @@ -1127,7 +1139,8 @@ def proportionally_attribute(
unattributable.full_name,
other.full_name,
sorted(set(zip(unattributable.SectorProducedBy.fillna('N/A'),
unattributable.SectorConsumedBy.fillna('N/A'))))
unattributable.SectorConsumedBy.fillna('N/A'),
unattributable.Location)))
)
vlog.debug(
'Unattributed activities: \n {}'.format(
Expand Down
12 changes: 12 additions & 0 deletions flowsa/methods/flowbysectormethods/Employment_common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,29 @@ _bls_selection_fields_state: &bls_selection_fields_state
Class: Employment
Location: !include:Location_common.yaml:_state_location

_bls_selection_fields_county: &bls_selection_fields_county
selection_fields:
Class: Employment
Location: !include:Location_common.yaml:_county_location

_bls_load_and_clean: &bls_load_and_clean
clean_fba_before_mapping: !script_function:BLS_QCEW estimate_suppressed_qcew
clean_fba: !script_function:BLS_QCEW clean_qcew_for_fbs

bls_load_and_clean_national:
<<: *bls_selection_fields_national
<<: *bls_load_and_clean
geoscale: national

bls_load_and_clean_state:
<<: *bls_selection_fields_state
<<: *bls_load_and_clean
geoscale: state

bls_load_and_clean_county:
<<: *bls_selection_fields_county
<<: *bls_load_and_clean
geoscale: county

bls_allocation:
selection_fields:
Expand Down
14 changes: 14 additions & 0 deletions flowsa/methods/flowbysectormethods/Employment_county_2022.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
industry_spec:
default: NAICS_3
target_naics_year: 2017
geoscale: county

source_names:
Employment_state_2022: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: state
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_county
year: 2022
activity_sets:
qcew: !include:Employment_common.yaml:bls_allocation
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2010: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2010
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2012: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2012
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2013: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2013
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2014: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2014
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2015: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2015
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2016: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2016
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2017: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2017
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2018: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2018
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2019: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2019
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2020: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2020
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2021: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2021
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2022: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2022
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ geoscale: state
source_names:
Employment_national_2023: !include:Employment_common.yaml:employment_fbs_allocation
data_format: FBS
geoscale: national
attribution_source:
BLS_QCEW: !include:Employment_common.yaml:bls_load_and_clean_state
year: 2023
Expand Down
Loading
Loading