Criteria pollutant explorer
This section describes the methods used to summarize the data shown in MPCA’s Criteria pollutant explorer.
15.1 Documentation
Methods for calculating NAAQS design values are explained in 40 CFR Part 50. All design values are calculated exactly as written.
15.2 Data Sources
Data for all NAAQS Criteria pollutants except lead are in WAIR. Lead data can be accessed via the AQS API. TSP data are currently in StarLIMS. Design Values for PM2.5 and ozone are calculated automatically in WAIR and are in the aqs.vm_pm25_design_value and aqs.vm_ozone_design_value schemas respectively.
R
script for pulling lead data from AQS API
Click the button below for code.
library(data.table)
library(tidyverse)
library(lubridate)
library(glue)
library(httr)
library(jsonlite)
library(htmltools)
#Pull data for last year
<- year(today()) - 1
year
<- "" #your email
api_email <- "" #your AQS API key, to create account see: https://aqs.epa.gov/aqsweb/documents/data_api.html#signup
api_key
<- fread("X:/Programs/Air_Quality_Programs/Air Monitoring Data and Risks/Air Data Analysis Supporting Information/Information Tables/AQS Raw Data format.csv") %>%
col_names pull(Name)
<- rawToChar(GET(
outcomes glue(
"https://aqs.epa.gov/data/api/transactionsSample/byState?email={api_email}&key={api_key}¶m=14129&bdate={year}0101&edate={year}1231&state=27"
),
encode = "json"
$content)
)
<- purrr::map(outcomes, ~ if (str_detect(.x, '\\"status\\": \\"Success\\"'))
raw_data fromJSON(.x)$Data else
NULL
%>%
) rbindlist(fill = T) %>%
setkey()
<- mutate(raw_data, TT = "RD", AC = "I") %>%
pb_data select(TT, AC, state_code:uncertainty) %>%
set_names(col_names)
fwrite(pb_data, glue("X:/Programs/Air_Quality_Programs/Air Monitoring Data and Risks/6 Air Data/Monitoring Data/Ambient Air Monitoring Data - Test/MPCA Statewide Results/Criteria Pollutants/Lead/Raw Data/Lead Results MN {year}.txt"), sep = "|")
R
script for pulling TSP data from Tableau
Click the button below for code.
library(tidyverse)
library(data.table)
library(lubridate)
library(RODBC)
library(glue)
options(scipen = 999)
#Years of data you want to pull
<- 2020
years
#Use the name of your LIMS connection (check odbc data sources)
<- odbcConnect("LIMS32_2", uid = "kgavin", pwd = "tableau_12") #Connect to LIMS
lims_cnx
<- c("Transaction Type", "Action Indicator", "State Code / Tribal Indicator",
colnames "County Code / Tribal Code", "Site Number", "Parameter", "POC", "Duration Code",
"Reported Unit", "Method Code", "Sample Date", "Sample Begin Time",
"Reported Sample Value", "Null Data Code", "Collection Frequency Code",
"Monitor Protocol ID", "Qualifier Code - 1", "Qualifier Code - 2", "Qualifier Code - 3",
"Qualifier Code - 4", "Qualifier Code - 5", "Qualifier Code - 6", "Qualifier Code - 7",
"Qualifier Code - 8", "Qualifier Code - 9", "Qualifier Code - 10",
"Alternate Method Detection Limit", "Uncertainty Value")
for(i in years){
print(i)
<- sqlQuery(lims_cnx, glue("SELECT 'RD', 'I', STATECODE, COUNTYCODE, SITECODE, PARAMCODE, POC, SAMPDURATION, UNITCODE, METHODCODE,
data TO_CHAR(RUNDATE, 'YYYYMMDD'), TO_CHAR(RUNDATE, 'HH24:MI'), VALUE, NULLDATACODE
FROM LIMSSUPER.DAILYDATA
WHERE (RUNDATE>={{ts '{i}-01-01 00:00:00'}} AND RUNDATE<{{ts '{i+1}-01-01 00:00:00'}}
AND PARAMCODE = '11101')
ORDER BY PARAMCODE, SITEID, RUNDATE"),
as.is = T, max = 0
)
if(nrow(data) > 0) {
for(j in as.character(1:14)) data[[j]] <- NA
<- set_names(data, colnames)
data
fwrite(data,
glue("X:/Programs/Air_Quality_Programs/Air Monitoring Data and Risks/6 Air Data/Monitoring Data/Ambient Air Monitoring Data - Test/MPCA Statewide Results/Criteria Pollutants/TSP/Raw Data/TSP Results MN {i}.txt"),
sep = "|",
col.names = T)
}rm(data)
}
15.3 R scripts
Scripts for calculating NAAQS/MAAQS Design Values are in this folder. The PM2.5 and ozone scripts are deprecated as their Design Values are calculated in WAIR.
Once design values have been calculated for all pollutants, use this R script to merge together in one file.
###Compile all calculted NAAQS DVs into 1 File
### Includes connection to site table and standards
library(tidyverse)
library(RPostgreSQL)
library(data.table)
= "X:/Programs/Air_Quality_Programs/Air Monitoring Data and Risks/Air Data Analysis Supporting Information/Information Tables/AQ Standards.csv"
standards_file
#CSV file with your WAIR "username" and "password"
= "H:/WAIR credentials.csv"
WAIR_credentials_file
= fread(WAIR_credentials_file)
WAIR_creds
<- dbDriver("PostgreSQL")
drv
<- dbConnect(drv, dbname = "wair", host = 'eiger', user = WAIR_creds$username, password = WAIR_creds$password)
con
= dbGetQuery(con, statement = paste("SELECT * FROM aqs.site
sites LEFT JOIN core.counties_contiguousus counties
ON aqs.site.stateid = counties.state_fips AND aqs.site.cntyid = counties.cnty_fips
LEFT JOIN aqs.city_fips
ON aqs.site.stateid = aqs.city_fips.stateid AND aqs.site.cntyid = aqs.city_fips.cntyid AND
aqs.site.cityid = aqs.city_fips.cityid")) %>% setDT()
= dbGetQuery(con, statement = "SELECT * FROM aqs.monitor_type")
monitor_types
setDT(monitor_types)
= monitor_types[mon_type %in% c("SLAMS", "TRIBAL", "NON-EPA FEDERAL", "EPA")
monitor_types := substr(monitor_id, 1, 11)
][, monitor_id 1, .(mon_type)], by = .(monitor_id)] %>%
][, .SD[separate(monitor_id, c("stateid", "cntyid", "siteid"), convert = T) %>% setDT()
= function() {
read_dvs library(data.table)
= "X:/Programs/Air_Quality_Programs/Air Monitoring Data and Risks/3 Data Analysis Results/Criteria Pollutant Analysis/DV Indicator Files/DVs - Final"
folder list.files(folder, pattern = "NAAQS Final DV", full.names = T) %>% lapply(fread) %>% rbindlist(fill = T)
}
= function(addresses) {
format_addresses = c("0th", "1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th")
street_numbers = str_to_title(addresses) %>%
addresses str_replace_all(street_numbers %>% str_to_title() %>% paste(collapse = "|"), street_numbers)
return(addresses)
}
= read_dvs()
alldvs
= alldvs[Pollutant %in% c("PM10"), `:=` (POC = str_sub(AQSID, -1, -1))
alldvs `:=`(AQSID_POC = AQSID, AQSID = str_sub(AQSID, 1, 11), stdjoin = paste(Pollutant, Indicator, sep=": "))] %>%
][, separate(AQSID, c("stateid", "cntyid", "siteid"), remove = F, convert = T) %>% setDT()
== "Daily Background Concentration", DV_Valid := "Yes"]
alldvs[Indicator
=fread(standards_file)
standards
= standards[monitor_types[sites[alldvs, on = c("stateid", "cntyid", "siteid")], on = c("stateid", "cntyid", "siteid")], on = "stdjoin"]
alldvs
= alldvs[!is.na(site_name) & !is.na(mon_type), .(
alldvs
Pollutant,
Indicator,
PrimaryStandard,
SecondaryStandard,Standards_Unit = Units,
YearPromulgated,NAAQS = as.logical(NAAQS),
MAAQS = as.logical(MAAQS),
Active = as.logical(Active),
AQSID_POC,
DVYear,
DV,FinalDV_Units = Units,
DV_Valid,Address = format_addresses(street_addr),
Cityname = lapply(city_name, function(x) str_split(x, "\\ \\(RR")[[1]][1]),
County = cnty_name,
CountyFIPS = fips,
Latitude = lat,
Longitude = lon,
Siteid = siteid,
Sitename = site_name,
AveragingTime,
ComplianceTest,
StandardDescription,
DataYears,ChartName = ifelse(is.na(city_name), paste0(site_name, " (", str_sub(AQSID_POC, 8, 13), ")"),
paste0(lapply(city_name, function(x) str_split(x, "\\ \\(RR")[[1]][1]), " (", str_sub(AQSID_POC, 8, 13), ")") )
)]
##Write files
fwrite(alldvs,"X:/Programs/Air_Quality_Programs/Air Monitoring Data and Risks/3 Data Analysis Results/Criteria Pollutant Analysis/DV Indicator Files/DVs - Final/NAAQS_DVs.csv")
15.4 Tableau
The Criteria Data Explorer workbook is located here. Refresh the extract and adjust date ranges then publish to the PCA Tableau server. Email the MPCA Data Desk to update the workbook on the PCA website.