{"id":15757,"date":"2025-07-11T15:56:20","date_gmt":"2025-07-11T14:56:20","guid":{"rendered":"https:\/\/bestpractice.bmj.com\/info\/?page_id=15757"},"modified":"2025-07-15T19:51:12","modified_gmt":"2025-07-15T18:51:12","slug":"blog","status":"publish","type":"page","link":"https:\/\/bestpractice.bmj.com\/info\/us\/blog\/","title":{"rendered":"Blog"},"content":{"rendered":"","protected":false},"excerpt":{"rendered":"","protected":false},"author":3,"featured_media":0,"parent":0,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"marketing_new.php","meta":{"_acf_changed":false,"footnotes":""},"class_list":["post-15757","page","type-page","status-publish","hentry"],"acf":{"components":[{"choose_component":"marketingImage","hide":"","logged_in":"","logged_out":"","grey_background":[],"background_color":"","background_image":null,"title_text_color":"","title":"The AI check-up: we pitted BMJ Best Practice against Gen-AI. The results weren't even close.","text":"","image":null,"qr_image":null,"circular_image":null,"button_text":"","button_url":"","button2_text":"","button2_url":"","video_embed":"","quote":"","quote_author":"","author_job_title":"","feature_iconsimages":null,"bullets":"none","bullet_text":null,"expiration_date":null,"review_type":"normal","reviews":null,"feature_image_cards":null,"feature_icon_cards":null,"podcast_date":null,"podcast_title":"","podcast_text":"","podcast_embed_script":"","campaign_advert_link":"","monthly":{"show_monthly_tab":false,"strikethrough_price":"","price_text":"","per_month_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"annual":{"show_annual_tab":false,"packages":null,"default_price":"","default_strikethrough_price":"","default_per_year_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"advisory_panel":null,"article_list_fields":null,"subscription_tab_main_heading":"","subscription_tabs":null,"accordion_for_page":null,"embed_code":""},{"choose_component":"generic","hide":"","logged_in":"","logged_out":"","grey_background":"","background_color":"","background_image":null,"title_text_color":"","title":"","text":"Picture this: It's 2 AM, you're covering the ICU, and a complex patient lands on your doorstep. A 68-year-old patient with newly diagnosed atrial fibrillation, diabetes, and mild kidney disease needs anticoagulation decisions - but which agent, what dose, and how do you navigate the contraindications? In the old days, you'd call your most trusted colleague for a curbside consult. Today, you might be tempted to ask ChatGPT or Grok.\r\n\r\nThe question keeping many of us awake (beyond those 2 AM consults) is whether generative AI can truly serve as that reliable clinical advisor. Can these impressive language models deliver the nuanced, evidence-based guidance we need for high-stakes decisions? Or should we stick with the clinical decision support tools we've trusted for years?\r\n\r\nTo find out, we conducted an informal head-to-head evaluation that might surprise you.","image":null,"qr_image":null,"circular_image":null,"button_text":"","button_url":"","button2_text":"","button2_url":"","video_embed":"","quote":"","quote_author":"","author_job_title":"","feature_iconsimages":null,"bullets":"none","bullet_text":null,"expiration_date":null,"review_type":"normal","reviews":null,"feature_image_cards":null,"feature_icon_cards":null,"podcast_date":null,"podcast_title":"","podcast_text":"","podcast_embed_script":"","campaign_advert_link":"","monthly":{"show_monthly_tab":false,"strikethrough_price":"","price_text":"","per_month_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"annual":{"show_annual_tab":false,"packages":null,"default_price":"","default_strikethrough_price":"","default_per_year_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"advisory_panel":null,"article_list_fields":null,"subscription_tab_main_heading":"","subscription_tabs":null,"accordion_for_page":null,"embed_code":""},{"choose_component":"generic","hide":"","logged_in":"","logged_out":"","grey_background":"","background_color":"","background_image":null,"title_text_color":"","title":"The clinical scenarios: where the rubber meets the road","text":"We selected seven clinical questions that represent the bread and butter of modern practice\u2014the scenarios where we most need reliable guidance. These ranged from diagnostic challenges like determining the right tests for a patient with suspected multiple sclerosis, to complex management decisions like treating a COPD exacerbation in a patient with diabetes, to navigating anticoagulation in a patient with permanent atrial fibrillation.\r\n\r\nEach question was designed to test not just basic medical knowledge, but the kind of contextual, evidence-based reasoning that separates good clinical advice from dangerous oversimplification. Think about managing a patient with suspected celiac disease\u2014it's not enough to know the tests; you need to understand the sequence, the interpretation, and the clinical context that guides decision-making.","image":null,"qr_image":null,"circular_image":null,"button_text":"","button_url":"","button2_text":"","button2_url":"","video_embed":"","quote":"","quote_author":"","author_job_title":"","feature_iconsimages":null,"bullets":"none","bullet_text":null,"expiration_date":null,"review_type":"normal","reviews":null,"feature_image_cards":null,"feature_icon_cards":null,"podcast_date":null,"podcast_title":"","podcast_text":"","podcast_embed_script":"","campaign_advert_link":"","monthly":{"show_monthly_tab":false,"strikethrough_price":"","price_text":"","per_month_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"annual":{"show_annual_tab":false,"packages":null,"default_price":"","default_strikethrough_price":"","default_per_year_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"advisory_panel":null,"article_list_fields":null,"subscription_tab_main_heading":"","subscription_tabs":null,"accordion_for_page":null,"embed_code":""},{"choose_component":"generic","hide":"","logged_in":"","logged_out":"","grey_background":"","background_color":"","background_image":null,"title_text_color":"","title":"The experiment: David vs. Goliath (times two)","text":"We posed identical clinical questions to three different systems: BMJ Best Practice, our established evidence-based champion, against two of the most sophisticated generative AI models available\u2014Grok 3.0 and ChatGPT 4.0.\r\nThe evaluation criteria were straightforward but rigorous: clinical validity, evidence-based approach, comprehensiveness, contextual relevance, and patient safety considerations. Essentially, we asked: \"If we were residents seeking guidance, which response would actually help us provide better patient care?\"","image":null,"qr_image":null,"circular_image":null,"button_text":"","button_url":"","button2_text":"","button2_url":"","video_embed":"","quote":"","quote_author":"","author_job_title":"","feature_iconsimages":null,"bullets":"none","bullet_text":null,"expiration_date":null,"review_type":"normal","reviews":null,"feature_image_cards":null,"feature_icon_cards":null,"podcast_date":null,"podcast_title":"","podcast_text":"","podcast_embed_script":"","campaign_advert_link":"","monthly":{"show_monthly_tab":false,"strikethrough_price":"","price_text":"","per_month_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"annual":{"show_annual_tab":false,"packages":null,"default_price":"","default_strikethrough_price":"","default_per_year_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"advisory_panel":null,"article_list_fields":null,"subscription_tab_main_heading":"","subscription_tabs":null,"accordion_for_page":null,"embed_code":""},{"choose_component":"generic","hide":"","logged_in":"","logged_out":"","grey_background":"","background_color":"","background_image":null,"title_text_color":"","title":"The results: a clear winner emerges","text":"The results weren't even close. BMJ Best Practice delivered consistently superior performance across every clinical scenario and evaluation dimension. But the real story lies in the details of how each system approached clinical reasoning.\r\n<table class=\"acc-table\" style=\"height: 400;\" border=\"0\" frame=\"box\" rules=\"all\" cellspacing=\"4\" cellpadding=\"4\" align=\"left\">\r\n<thead>\r\n<tr style=\"height: 5px;\">\r\n<th style=\"height: 5px; width: 210px;\"><strong>Performance dimension<\/strong><\/th>\r\n<th style=\"height: 5px; width: 210px;\"><strong>BMJ Best Practice<\/strong><\/th>\r\n<th style=\"height: 5px; width: 210px;\"><strong>Grok 3.0<\/strong><\/th>\r\n<th style=\"height: 5px; width: 210px;\"><strong>ChatGPT 4.0<\/strong><\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr style=\"height: 5px;\">\r\n<td style=\"height: 5px; width: 210px;\"><b>Evidence-based approach<\/b><\/td>\r\n<td style=\"height: 5px; width: 210px;\"><span style=\"font-weight: 400;\">Superior - Extensive current citations<\/span><\/td>\r\n<td style=\"height: 5px; width: 210px;\"><span style=\"font-weight: 400;\">Moderate - Some evidence basis<\/span><\/td>\r\n<td style=\"height: 5px; width: 210px;\"><span style=\"font-weight: 400;\">Limited - Minimal evidence references<\/span><\/td>\r\n<\/tr>\r\n<tr style=\"height: 5px;\">\r\n<td style=\"height: 5px; width: 210px;\"><b>Clinical comprehensiveness<\/b><\/td>\r\n<td style=\"height: 5px; width: 210px;\"><span style=\"font-weight: 400;\">Superior - Comprehensive coverage<\/span><\/td>\r\n<td style=\"height: 5px; width: 210px;\"><span style=\"font-weight: 400;\">Moderate - Adequate coverage<\/span><\/td>\r\n<td style=\"height: 5px; width: 210px;\"><span style=\"font-weight: 400;\">Limited - Basic coverage<\/span><\/td>\r\n<\/tr>\r\n<tr style=\"height: 45px;\">\r\n<td style=\"height: 45px; width: 210px;\"><b>Contextual relevance<\/b><\/td>\r\n<td style=\"height: 45px; width: 210px;\"><span style=\"font-weight: 400;\">Superior - Detailed contextual guidance<\/span><\/td>\r\n<td style=\"height: 45px; width: 210px;\"><span style=\"font-weight: 400;\">Moderate - Some context provided<\/span><\/td>\r\n<td style=\"height: 45px; width: 210px;\"><span style=\"font-weight: 400;\">Limited - Minimal contextual adaptation<\/span><\/td>\r\n<\/tr>\r\n<tr style=\"height: 68px;\">\r\n<td style=\"height: 68px; width: 210px;\"><b>Clinical organization<\/b><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Superior - Clear structured hierarchy<\/span><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Good - Well-organized<\/span><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Moderate - Basic organisation<\/span><\/td>\r\n<\/tr>\r\n<tr style=\"height: 68px;\">\r\n<td style=\"height: 68px; width: 210px;\"><b>Practical utility<\/b><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Superior - Actionable clinical guidance<\/span><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Moderate - Generally practical<\/span><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Limited - Basic utility<\/span><\/td>\r\n<\/tr>\r\n<tr style=\"height: 68px;\">\r\n<td style=\"height: 68px; width: 210px;\"><b>Patient safety considerations<\/b><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Superior - Extensive safety guidance<\/span><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Moderate - Some safety considerations<\/span><\/td>\r\n<td style=\"height: 68px; width: 210px;\"><span style=\"font-weight: 400;\">Limited - Basic safety awareness<\/span><\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n&nbsp;\r\n\r\nBMJ Best Practice consistently provided what clinicians actually need: current evidence citations, clear hierarchies of first-line versus second-line interventions, detailed considerations for complex patients with comorbidities, and explicit safety warnings. When discussing pulmonary embolism management, for instance, it provided nuanced guidance for patients with renal impairment, obesity, and cancer\u2014the real-world complexity we face daily.\r\n\r\nThe LLMs, while impressive in their breadth of knowledge, fell short in critical ways. They provided plausible-sounding answers that often lacked the depth and evidence base necessary for clinical decision-making. ChatGPT 4.0 was particularly concerning, offering superficial responses that might satisfy a curious patient but would leave a clinician wanting. Grok 3.0 performed better but still couldn't match the systematic, evidence-based approach of a purpose-built clinical tool.","image":null,"qr_image":null,"circular_image":null,"button_text":"","button_url":"","button2_text":"","button2_url":"","video_embed":"","quote":"","quote_author":"","author_job_title":"","feature_iconsimages":null,"bullets":"none","bullet_text":null,"expiration_date":null,"review_type":"normal","reviews":null,"feature_image_cards":null,"feature_icon_cards":null,"podcast_date":null,"podcast_title":"","podcast_text":"","podcast_embed_script":"","campaign_advert_link":"","monthly":{"show_monthly_tab":false,"strikethrough_price":"","price_text":"","per_month_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"annual":{"show_annual_tab":false,"packages":null,"default_price":"","default_strikethrough_price":"","default_per_year_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"advisory_panel":null,"article_list_fields":null,"subscription_tab_main_heading":"","subscription_tabs":null,"accordion_for_page":null,"embed_code":""},{"choose_component":"generic","hide":"","logged_in":"","logged_out":"","grey_background":"","background_color":"","background_image":null,"title_text_color":"","title":"Implications for modern practice: the difference between plausible and proven","text":"Here's what this means for those of us at the bedside: There's a crucial difference between information that sounds right and guidance that is right. Generative AI excels at producing coherent, convincing text, but clinical decision-making requires something more\u2014systematic evidence evaluation, expert clinical judgment, and the kind of rigorous validation processes that ensure patient safety.\r\n\r\nBMJ Best Practice represents years of systematic literature review, expert editorial oversight, and continuous updating by clinical specialists. It's not just a database of medical facts; it's a curated clinical reasoning system designed specifically for healthcare decision-making. The LLMs, impressive as they are, operate fundamentally differently\u2014they're pattern-matching systems trained on diverse text corpora without the clinical validation and evidence hierarchy that healthcare decisions demand.\r\n\r\nThis doesn't mean generative AI has no place in medicine. These tools show remarkable promise for administrative tasks, making patient education materials accessible, and clinical documentation. But when it comes to direct clinical decision support\u2014the moments when patient outcomes hang in the balance\u2014we need systems built specifically for healthcare, with the evidence base and validation processes that patient safety requires.\r\n\r\nThe lesson here isn't to fear AI, but to be discerning consumers of it. As the healthcare landscape evolves, our obligation to patients remains constant: to base our decisions on the strongest available evidence, delivered through systems designed and validated for clinical use. That's not just good medicine\u2014it's the foundation of professional integrity in an age of artificial intelligence.\r\n\r\nIn the end, there's still no substitute for rigorous, evidence-based clinical decision support. The AI revolution in healthcare is real, but it's not here yet\u2014at least not for the decisions that matter most.","image":[{"image_field":""}],"qr_image":null,"circular_image":null,"button_text":"","button_url":"","button2_text":"","button2_url":"","video_embed":"","quote":"","quote_author":"","author_job_title":"","feature_iconsimages":null,"bullets":"none","bullet_text":null,"expiration_date":null,"review_type":"normal","reviews":null,"feature_image_cards":null,"feature_icon_cards":null,"podcast_date":null,"podcast_title":"","podcast_text":"","podcast_embed_script":"","campaign_advert_link":"","monthly":{"show_monthly_tab":false,"strikethrough_price":"","price_text":"","per_month_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"annual":{"show_annual_tab":false,"packages":null,"default_price":"","default_strikethrough_price":"","default_per_year_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"advisory_panel":null,"article_list_fields":null,"subscription_tab_main_heading":"","subscription_tabs":null,"accordion_for_page":null,"embed_code":""},{"choose_component":"generic","hide":"","logged_in":"","logged_out":"","grey_background":["yes"],"background_color":"","background_image":null,"title_text_color":"","title":"About the authors","text":"<em>Dr. Blackford Middleton is a leading expert in clinical informatics and healthcare technology. At BMJ Group, he serves as a consultant for Digital Knowledge Products.<\/em>\r\n\r\n<em>Dr. Kieran Walsh is Clinical Director at BMJ Group. He is a general physician with extensive experience in medical education and evidence-based practice.<\/em>\r\n\r\nCompeting interests\r\nDr Blackford Middleton acts as a consultant at BMJ Group. Dr Kieran Walsh works for BMJ Group.","image":null,"qr_image":null,"circular_image":15679,"button_text":"","button_url":"","button2_text":"","button2_url":"","video_embed":"","quote":"","quote_author":"","author_job_title":"","feature_iconsimages":null,"bullets":"none","bullet_text":null,"expiration_date":null,"review_type":"normal","reviews":null,"feature_image_cards":null,"feature_icon_cards":null,"podcast_date":null,"podcast_title":"","podcast_text":"","podcast_embed_script":"","campaign_advert_link":"","monthly":{"show_monthly_tab":false,"strikethrough_price":"","price_text":"","per_month_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"annual":{"show_annual_tab":false,"packages":null,"default_price":"","default_strikethrough_price":"","default_per_year_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"advisory_panel":null,"article_list_fields":null,"subscription_tab_main_heading":"","subscription_tabs":null,"accordion_for_page":null,"embed_code":""},{"choose_component":"genericCircular","hide":"","logged_in":"","logged_out":"","grey_background":"","background_color":"","background_image":null,"title_text_color":"","title":"Appendix","text":"<h3>The clinical case questions:<\/h3>\r\n<ul>\r\n \t<li>What tests should I order for a patient with heart failure with reduced ejection fraction?<\/li>\r\n \t<li>How should I treat a patient with acute exacerbation of COPD and diabetes?<\/li>\r\n \t<li>You suspect a patient has multiple sclerosis - what tests should you order?<\/li>\r\n \t<li>What tests should I order for an adult patient with hypertension?<\/li>\r\n \t<li>How should I manage a patient with permanent atrial fibrillation with no contraindications to long-term anticoagulation?<\/li>\r\n \t<li>What tests should I order for a patient with suspected celiac disease?<\/li>\r\n \t<li>What drug management should I advise for a patient with pulmonary embolism with an intermediate-low risk or low risk PESI\/sPESI score with no contraindication to anticoagulation?<\/li>\r\n<\/ul>","image":null,"qr_image":null,"circular_image":15679,"button_text":"","button_url":"","button2_text":"","button2_url":"","video_embed":"","quote":"","quote_author":"","author_job_title":"","feature_iconsimages":null,"bullets":"none","bullet_text":null,"expiration_date":null,"review_type":"normal","reviews":null,"feature_image_cards":null,"feature_icon_cards":null,"podcast_date":null,"podcast_title":"","podcast_text":"","podcast_embed_script":"","campaign_advert_link":"","monthly":{"show_monthly_tab":false,"strikethrough_price":"","price_text":"","per_month_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"annual":{"show_annual_tab":false,"packages":null,"default_price":"","default_strikethrough_price":"","default_per_year_text":"","vat_text":"","subscribe_button_text":"","subscribe_button_link":""},"advisory_panel":null,"article_list_fields":null,"subscription_tab_main_heading":"","subscription_tabs":null,"accordion_for_page":null,"embed_code":""}],"sticky_cta_text":"","sticky_cta_link":"","display_content_search":false,"seo_schema":""},"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/bestpractice.bmj.com\/info\/us\/wp-json\/wp\/v2\/pages\/15757","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/bestpractice.bmj.com\/info\/us\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/bestpractice.bmj.com\/info\/us\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/bestpractice.bmj.com\/info\/us\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/bestpractice.bmj.com\/info\/us\/wp-json\/wp\/v2\/comments?post=15757"}],"version-history":[{"count":0,"href":"https:\/\/bestpractice.bmj.com\/info\/us\/wp-json\/wp\/v2\/pages\/15757\/revisions"}],"wp:attachment":[{"href":"https:\/\/bestpractice.bmj.com\/info\/us\/wp-json\/wp\/v2\/media?parent=15757"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}