{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2026-04-25T23:12:16.601159+00:00",
  "custom_fields": {},
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:9ae49ecd888c47efe89f92c3d018fa04",
        "ext": "pdf",
        "id": "c8564727-3e1a-4c92-833a-a822ccb072ab",
        "key": "Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf",
        "links": {
          "content": "https://zenodo.org/api/records/19774692/files/Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/content",
          "iiif_api": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/full/full/0/default.png",
          "iiif_base": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf",
          "iiif_canvas": "https://zenodo.org/api/iiif/record:19774692/canvas/Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf",
          "iiif_info": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/info.json",
          "self": "https://zenodo.org/api/records/19774692/files/Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf"
        },
        "metadata": {
          "height": 792,
          "width": 612
        },
        "mimetype": "application/pdf",
        "size": 2457733,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 2457733
  },
  "id": "19774692",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/19774692/access",
    "access_grants": "https://zenodo.org/api/records/19774692/access/grants",
    "access_links": "https://zenodo.org/api/records/19774692/access/links",
    "access_request": "https://zenodo.org/api/records/19774692/access/request",
    "access_users": "https://zenodo.org/api/records/19774692/access/users",
    "archive": "https://zenodo.org/api/records/19774692/files-archive",
    "archive_media": "https://zenodo.org/api/records/19774692/media-files-archive",
    "communities": "https://zenodo.org/api/records/19774692/communities",
    "communities-suggestions": "https://zenodo.org/api/records/19774692/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.19774692",
    "draft": "https://zenodo.org/api/records/19774692/draft",
    "file_modification": "https://zenodo.org/api/records/19774692/file-modification",
    "files": "https://zenodo.org/api/records/19774692/files",
    "latest": "https://zenodo.org/api/records/19774692/versions/latest",
    "latest_html": "https://zenodo.org/records/19774692/latest",
    "media_files": "https://zenodo.org/api/records/19774692/media-files",
    "parent": "https://zenodo.org/api/records/19774691",
    "parent_doi": "https://doi.org/10.5281/zenodo.19774691",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.19774691",
    "parent_html": "https://zenodo.org/records/19774691",
    "preview_html": "https://zenodo.org/records/19774692?preview=1",
    "quota_increase": "https://zenodo.org/api/records/19774692/quota-increase",
    "request_deletion": "https://zenodo.org/api/records/19774692/request-deletion",
    "requests": "https://zenodo.org/api/records/19774692/requests",
    "reserve_doi": "https://zenodo.org/api/records/19774692/draft/pids/doi",
    "self": "https://zenodo.org/api/records/19774692",
    "self_doi": "https://doi.org/10.5281/zenodo.19774692",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.19774692",
    "self_html": "https://zenodo.org/records/19774692",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:19774692/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:19774692/sequence/default",
    "thumbnails": {
      "10": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/full/%5E10,/0/default.jpg",
      "100": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/full/%5E100,/0/default.jpg",
      "1200": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/full/%5E1200,/0/default.jpg",
      "250": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/full/%5E250,/0/default.jpg",
      "50": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/full/%5E50,/0/default.jpg",
      "750": "https://zenodo.org/api/iiif/record:19774692:Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf/full/%5E750,/0/default.jpg"
    },
    "versions": "https://zenodo.org/api/records/19774692/versions"
  },
  "media_files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf.ptif": {
        "access": {
          "hidden": true
        },
        "ext": "ptif",
        "id": "1b75e85c-ea14-402b-9cc6-dd394a0ff818",
        "key": "Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf.ptif",
        "links": {
          "content": "https://zenodo.org/api/records/19774692/files/Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf.ptif/content",
          "self": "https://zenodo.org/api/records/19774692/files/Bal_Puhan_RAG_Retrieval_Benchmarking_Paper.pdf.ptif"
        },
        "metadata": null,
        "mimetype": "application/octet-stream",
        "processor": {
          "source_file_id": "c8564727-3e1a-4c92-833a-a822ccb072ab",
          "status": "finished",
          "type": "image-tiles"
        },
        "size": 0,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "copyright": "\u00a9 2026 Devi Prasad Bal and Subhashree Puhan.",
    "creators": [
      {
        "affiliations": [
          {
            "id": "edmo:4475",
            "identifiers": [
              {
                "identifier": "edmo:4475",
                "scheme": "edmo"
              }
            ],
            "name": "The University of Texas at Austin"
          }
        ],
        "person_or_org": {
          "family_name": "Bal",
          "given_name": "Devi Prasad",
          "identifiers": [
            {
              "identifier": "0009-0007-6698-8276",
              "scheme": "orcid"
            }
          ],
          "name": "Bal, Devi Prasad",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "id": "edmo:4475",
            "identifiers": [
              {
                "identifier": "edmo:4475",
                "scheme": "edmo"
              }
            ],
            "name": "The University of Texas at Austin"
          }
        ],
        "person_or_org": {
          "family_name": "Puhan",
          "given_name": "Subhashree",
          "identifiers": [
            {
              "identifier": "0009-0002-3131-729X",
              "scheme": "orcid"
            }
          ],
          "name": "Puhan, Subhashree",
          "type": "personal"
        }
      }
    ],
    "description": "<p>Retrieval-Augmented Generation (RAG) offers a well-established path to grounding large language model (LLM) outputs in external knowledge, yet the question of which retrieval strategy works best in a high-stakes domain such as biomedicine has not received the controlled, multi-metric treatment it deserves. This paper presents a systematic empirical comparison of five retrieval strategies&mdash;Dense Vector Search, Hybrid BM25 + Dense retrieval, Cross-Encoder Reranking, Multi-Query Expansion, and Maximal Marginal Relevance (MMR)&mdash;within a biomedical question-answering RAG pipeline. All strategies share a fixed generation model (GPT-4o-mini), a common vector store (ChromaDB), and OpenAI's text-embedding-3-small embeddings, ensuring that observed differences are attributable to retrieval alone. Evaluation is conducted on 250 question-answer (QA) pairs drawn from a preprocessed subset of the BioASQ benchmark (rag-mini-bioasq) using four DeepEval metrics: contextual precision, contextual recall, faithfulness, and answer relevancy, each reported with 95% confidence intervals computed over 250 samples. A no-context ablation is included as a lower bound. Cross-Encoder Reranking achieves the best composite score (0.827) and highest contextual precision (0.852), confirming that query-document interaction yields measurable retrieval gains. Multi-Query Expansion, despite its recall-oriented design, produces the weakest contextual precision (0.671), suggesting naive query diversification introduces retrieval noise. MMR sacrifices answer relevancy for diversity, while the Dense baseline (composite 0.822) falls within 0.005 points of the top strategy. All RAG conditions dramatically outperform the no-context ablation on answer relevancy (0.658&ndash;0.701 vs. 0.287), confirming the practical value of retrieval. The full pipeline, hyperparameters, and evaluation code are publicly available at <a class=\"underline underline underline-offset-2 decoration-1 decoration-current/40 hover:decoration-current focus:decoration-current\" href=\"https://github.com/deviprasadbal/RAGHealthcareRetrievalStrategies\">https://github.com/deviprasadbal/RAGHealthcareRetrievalStrategies</a>.</p>",
    "languages": [
      {
        "id": "eng",
        "title": {
          "en": "English"
        }
      }
    ],
    "publication_date": "2026-04-25",
    "publisher": "Zenodo",
    "related_identifiers": [
      {
        "identifier": "https://github.com/deviprasadbal/RAGHealthcareRetrievalStrategies/",
        "relation_type": {
          "id": "issupplementedby",
          "title": {
            "de": "Wird erg\u00e4nzt durch",
            "en": "Is supplemented by"
          }
        },
        "resource_type": {
          "id": "software",
          "title": {
            "de": "Software",
            "en": "Software"
          }
        },
        "scheme": "url"
      },
      {
        "identifier": "arXiv:2605.02520",
        "relation_type": {
          "id": "isidenticalto",
          "title": {
            "de": "Ist identisch mit",
            "en": "Is identical to"
          }
        },
        "resource_type": {
          "id": "publication-preprint",
          "title": {
            "de": "Preprint",
            "en": "Preprint"
          }
        },
        "scheme": "arxiv"
      }
    ],
    "resource_type": {
      "id": "publication-preprint",
      "title": {
        "de": "Preprint",
        "en": "Preprint"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "subjects": [
      {
        "subject": "Retrieval-Augmented Generation"
      },
      {
        "subject": "biomedical question answering"
      },
      {
        "subject": "information retrieval"
      },
      {
        "subject": "dense retrieval"
      },
      {
        "subject": "hybrid search"
      },
      {
        "subject": "cross-encoder reranking"
      },
      {
        "subject": "multi-query expansion"
      },
      {
        "subject": "maximal marginal relevance"
      },
      {
        "subject": "BioASQ"
      },
      {
        "subject": "RAG"
      },
      {
        "subject": "evaluation"
      },
      {
        "subject": "DeepEval"
      }
    ],
    "title": "Benchmarking Retrieval Strategies for Biomedical Retrieval-Augmented Generation: A Controlled Empirical Study",
    "version": "1.0.0"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "1625137"
      },
      "settings": {
        "accept_conditions_text": null,
        "allow_guest_requests": false,
        "allow_user_requests": false,
        "secret_link_expiration": 0
      }
    },
    "communities": {},
    "id": "19774691",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.19774691",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.19774692",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:19774692",
      "provider": "oai"
    }
  },
  "revision_id": 6,
  "stats": {
    "all_versions": {
      "data_volume": 366202217.0,
      "downloads": 149,
      "unique_downloads": 114,
      "unique_views": 164,
      "views": 187
    },
    "this_version": {
      "data_volume": 366202217.0,
      "downloads": 149,
      "unique_downloads": 114,
      "unique_views": 164,
      "views": 187
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2026-05-05T03:53:05.874530+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}