==============================================================================
HD 2.4.a smoke test - Stages 1-3 on representative years
==============================================================================

--- FY 2008 (era A) ---
  Stage 1 raw rows:    76,136
  Stage 2 in-scope rows:    21,051
    question='Expenditures by S&E field'                                            n=21,051
  Stage 3 joined rows:    21,051
  distinct discipline_coarse (non-NULL): 9
    'All'
    'Engineering'
    'Geosciences'
    'Life sciences'
    'Math & CS'
    'Other sciences nec'
    'Physical sciences'
    'Psychology'
    'Social sciences'
  quality_flag distribution:
                 'imputed'  n=    1,517  ( 7.21%)
                'reported'  n=   19,534  (92.79%)
  Sample rows (3 per question):
    q='Expenditures by S&E field'                          qno='02'     row='All'                                    col='Federal'                    data='21739'    status=None  coarse='All'                flag='reported'
    q='Expenditures by S&E field'                          qno='02'     row='All'                                    col='Federal'                    data='3329'     status=None  coarse='All'                flag='reported'
    q='Expenditures by S&E field'                          qno='02'     row='All'                                    col='Federal'                    data='305'      status=None  coarse='All'                flag='reported'

--- FY 2024 (era B) ---
  Stage 1 raw rows:   264,321
  Stage 2 in-scope rows:   219,785
    question='Capitalized equipment expenditures by field and source'               n=22,365
    question='Clinical trials'                                                      n=552
    question='Federal expenditures by field and agency'                             n=103,808
    question='Medical school expenditures'                                          n=162
    question='Nonfederal expenditures by field and source'                          n=92,898
  Stage 3 joined rows:   219,785
  distinct discipline_coarse (non-NULL): 10
    'All'
    'Engineering'
    'Geosciences'
    'Life sciences'
    'Math & CS'
    'Non-S&E'
    'Other sciences nec'
    'Physical sciences'
    'Psychology'
    'Social sciences'
  quality_flag distribution:
                 'imputed'  n=    6,142  ( 2.79%)
                'reported'  n=  213,643  (97.21%)
  Sample rows (3 per question):
    q='Capitalized equipment expenditures by field and so' qno='14A'    row='Computer and information sciences, all' col='Federal'                    data='783'      status=None  coarse='Math & CS'          flag='reported'
    q='Capitalized equipment expenditures by field and so' qno='14A'    row='Computer and information sciences, all' col='Federal'                    data='35'       status=None  coarse='Math & CS'          flag='reported'
    q='Capitalized equipment expenditures by field and so' qno='14A'    row='Computer and information sciences, all' col='Federal'                    data='2532'     status=None  coarse='Math & CS'          flag='reported'
    q='Clinical trials'                                    qno='05'     row='Federal'                                col=None                         data='599'      status=None  coarse=''                   flag='reported'
    q='Clinical trials'                                    qno='05'     row='Federal'                                col=None                         data='1562'     status=None  coarse=''                   flag='reported'
    q='Clinical trials'                                    qno='05'     row='Federal'                                col=None                         data='63'       status=None  coarse=''                   flag='reported'
    q='Federal expenditures by field and agency'           qno='09A'    row='Computer and information sciences, all' col='DOD'                        data='205'      status='i'   coarse='Math & CS'          flag='imputed'
    q='Federal expenditures by field and agency'           qno='09A'    row='Computer and information sciences, all' col='DOD'                        data='0'        status=None  coarse='Math & CS'          flag='reported'
    q='Federal expenditures by field and agency'           qno='09A'    row='Computer and information sciences, all' col='DOD'                        data='2066'     status=None  coarse='Math & CS'          flag='reported'
    q='Medical school expenditures'                        qno='04'     row='Total'                                  col=None                         data='1613'     status=None  coarse=''                   flag='reported'
    q='Medical school expenditures'                        qno='04'     row='Total'                                  col=None                         data='585'      status=None  coarse=''                   flag='reported'
    q='Medical school expenditures'                        qno='04'     row='Total'                                  col=None                         data='498'      status=None  coarse=''                   flag='reported'
    q='Nonfederal expenditures by field and source'        qno='11A'    row='Computer and information sciences, all' col='All other sources'          data='48'       status=None  coarse='Math & CS'          flag='reported'
    q='Nonfederal expenditures by field and source'        qno='11A'    row='Computer and information sciences, all' col='All other sources'          data='0'        status=None  coarse='Math & CS'          flag='reported'
    q='Nonfederal expenditures by field and source'        qno='11A'    row='Computer and information sciences, all' col='All other sources'          data='0'        status=None  coarse='Math & CS'          flag='reported'

--- Smoke-test boundaries ---
  No assertions raised: Stage 3 crosswalk-coverage and status-codeset checks passed for all years tested.
  No parquet written: this is a Stages-1-3-only smoke test.
  Wall time (full smoke test, all years): 0.97s
