Child height and weight HAZ summaries for multiple countries

Data details

There are 2,851 rows and 12 columns. The data source¹ is used to create our data that is stored in our pins table. You can access this pin from a connection to posit.byui.edu using hathawayj/childhealth_summary.

Variable description

subjid: unique identifier of each child
sex: Male or Female
country: Label for the varied countries
haz_mean: The average HAZ score over all measurements
waz_mean: The average WAZ score over all measurements
observations: Number of observations for that subject
agedays_last: The age in days for the HAZ and WAZ last variable
agedays_first: The age in days for the HAZ and WAZ first variable
haz_last: The first HAZ measurement on the subject for age in days
haz_first: The last HAZ measurement on the subject for age in days
waz_last: The last WAZ measurement on the subject for age in days
waz_first: The first WAZ measurement on the subject for age in days

Variable summary

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
haz_mean	280	0.90	-0.85	1.23	-5.99	-1.70	-0.91	-0.02	3.10	▁▂▇▆▁
waz_mean	47	0.98	-0.52	1.13	-4.61	-1.26	-0.47	0.22	4.03	▁▃▇▂▁
observations	0	1.00	18.43	9.61	1.00	8.00	25.00	26.00	27.00	▃▁▁▁▇
agedays_last	3	1.00	895.35	691.06	0.00	729.00	730.00	734.00	2558.00	▂▇▁▁▂
agedays_first	0	1.00	1.29	3.34	0.00	0.00	0.00	1.00	17.00	▇▁▁▁▁
haz_last	305	0.89	-1.13	1.45	-6.72	-2.13	-1.24	-0.12	5.33	▁▅▇▂▁
haz_first	1996	0.30	0.26	1.52	-9.82	-0.71	0.37	1.12	4.72	▁▁▂▇▂
waz_first	0	1.00	-0.43	1.15	-5.98	-1.15	-0.40	0.35	3.60	▁▁▇▇▁
waz_last	8	1.00	-0.63	1.33	-5.83	-1.53	-0.64	0.23	4.42	▁▂▇▃▁

Variable type: character

skim_variable	complete_rate	min	max	n_unique
subjid	1	1	18	2851
sex	1	4	6	2
country	1	4	13	10

Explore generating code using R

pacman::p_load(tidyverse, fs, sf, arrow, googledrive, downloader, fs, glue, rvest, pins, connectapi)

# get three sources
hbgd_temp <- tempfile()
download('https://github.com/HBGDki/hbgd/raw/master/data/cpp.rda', hbgd_temp, mode = 'wb')
load(hbgd_temp) #cpp object

tdat <- tempfile()
download("https://github.com/stefvanbuuren/brokenstick/raw/71dc99e62ce57b58d5c1d2a1074fbd4bf394e559/data/smocc_hgtwgt.rda",tdat, mode = "wb") #smocc_hgtwgt object
load(tdat)

sdrive <- shared_drive_find("byuids_data")
maled_file <- drive_ls(sdrive)  |>
    filter(stringr::str_detect(name, "MALED"))
tempf <- tempfile()
drive_download(maled_file, tempf)
dat <- read_csv(tempf)

# Format source data
childhealth_dutch <- smocc_hgtwgt |>
  select(subjid, sex, agedays, gagebrth, htcm, wtkg, haz, waz)

childhealth_us <- cpp %>%
  select(subjid, sex, agedays, gagebrth, htcm, wtkg, haz, waz, mrace, mage, meducyrs, ses)


childhealth_maled <- dat %>%
  select(
    subjid = `Participant ID`, sex = Sex, country = Country,
    agedays = `Age (days)`, wtkg = `Weight (kg)`, stcm = `Stature (cm)`,
    htcm = `Height (cm)`, lncm = `Recumbent length (cm)`,
    lh_used = `Recumbent length or height used for stature`,
    hccm = `Head circumference (cm)`,
    lhaz = `Length- or height-for-age z-score`,
    haz = `Height-for-age z-score`, laz= `Length-for-age z-score`,
    waz = `Weight-for-age z-score`, hcaz = `Head circumference-for-age z-score`,
    whz = `Weight-for-length or -height z-score`)


# combine to one file
childhealth_summary <- bind_rows(
  childhealth_dutch %>%
    select(subjid, agedays, sex, htcm, wtkg, haz, waz) %>%
    mutate(country = "Netherlands", subjid = as.character(subjid)),

  childhealth_maled %>%
    select(subjid, agedays,  sex, htcm = stcm, wtkg, lhaz, waz, country) %>%
    rename(haz = lhaz),

  childhealth_us %>%
    select(subjid, agedays, sex, htcm, wtkg, haz, waz) %>%
    mutate(country = "United States", subjid = as.character(subjid))
) %>%
  as_tibble() %>%
  group_by(subjid, sex, country) %>%
  arrange(agedays) %>%
  summarise(haz_mean = mean(haz, na.rm = TRUE), waz_mean = mean(waz), 
            observations = n(), agedays_last = agedays[n()], agedays_first = agedays[1],
            haz_last = haz[n()], haz_first = haz[1], waz_first = waz[1], waz_last = waz[n()]) %>%
  ungroup()

# push to board
board <- board_connect()
pin_write(board, childhealth_summary, type = "parquet", access_type = "all")

pin_name <- "childhealth_summary"
meta <- pin_meta(board, paste0("hathawayj/", pin_name))
client <- connect()
my_app <- content_item(client, meta$local$content_id)
set_vanity_url(my_app, paste0("data/", pin_name))

Access data

This data is available to all.

Direct Download: childhealth_summary.parquet

R and Python Download:

URL Connections:

For public data, any user can connect and read the data using pins::board_connect_url() in R.

library(pins)
url_data <- "https://posit.byui.edu/data/childhealth_summary/"
board_url <- board_connect_url(c("dat" = url_data))
dat <- pin_read(board_url, "dat")

Use this custom function in Python to have the data in a Pandas DataFrame.

import pandas as pd
import requests
from io import BytesIO

def read_url_pin(name):
  url = "https://posit.byui.edu/data/" + name + "/" + name + ".parquet"
  response = requests.get(url)
  if response.status_code == 200:
    parquet_content = BytesIO(response.content)
    pandas_dataframe = pd.read_parquet(parquet_content)
    return pandas_dataframe
  else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")
    return None

# Example usage:
pandas_df = read_url_pin("childhealth_summary")

Authenticated Connection:

Our connect server is https://posit.byui.edu which you assign to your CONNECT_SERVER environment variable. You must create an API key and store it in your environment under CONNECT_API_KEY.

Read more about environment variables and the pins package to understand how these environment variables are stored and accessed in R and Python with pins.

library(pins)
board <- board_connect(auth = "auto")
dat <- pin_read(board, "hathawayj/childhealth_summary")

import os
from pins import board_rsconnect
from dotenv import load_dotenv
load_dotenv()
API_KEY = os.getenv('CONNECT_API_KEY')
SERVER = os.getenv('CONNECT_SERVER')

board = board_rsconnect(server_url=SERVER, api_key=API_KEY)
dat = board.pin_read("hathawayj/childhealth_summary")

Footnotes

https://github.com/hafen/hbgd ↩︎