flowchart TD
%% Stage 1-2: Initial Data
subgraph "Stage 1-2: Initial Corpus"
USER["👤 User Input:
Molecule: Empagliflozin"]
IND["🧠 LLM Discovers Indications:
T2D, HF, CKD"]
INIT["📚 Initial Corpus:
GBQ: 25 docs
FDA: 8 docs"]
end
%% Stage 4: Similar Molecules
subgraph "Stage 4: Auto-Expansion"
SIM["🔄 Similar Molecules Found:
Dapagliflozin, Canagliflozin
Ertugliflozin"]
EXP1["📈 Corpus Expands:
+45 GBQ docs
+12 FDA labels"]
CORPUS1["📚 Expanded Corpus: 90 docs"]
end
%% Stage 5: Geographic Guidelines
subgraph "Stage 5: Geographic Guidelines"
GEO["🌍 5-Region Search:
US • EU • APAC • LATAM • MEA"]
WEB1["🔍 Web Search + Scrapers"]
GAPS["📤 User Fills Gaps"]
GUIDE["📋 Guidelines Added:
+85 documents"]
end
%% Stage 6: Clinical Studies
subgraph "Stage 6: Clinical Studies"
CLIN["📊 Web Search Clinical Studies"]
CAT["📑 AI Categorization:
Phase I/II/III • RWE
Safety • Efficacy"]
SEL["✅ User Selection"]
STUD["📈 Studies Added:
+67 documents"]
end
%% Final Corpus
subgraph "Complete Corpus"
FINAL_CORPUS["📚 Final Corpus:
242 Documents • 2.8 GB
Multi-Source • Multi-Format"]
end
%% Document Processing
subgraph "Processing Phase"
PROC["🔄 Multi-Method Processing"]
TEXT["📝 Text Extraction"]
TABLES["📊 Table Data: 156 tables"]
IMAGES["🖼️ Image Analysis: 89 figures"]
META["🏷️ Metadata: Citations, Sources"]
CORPUS["📚 Structured Corpus
Ready for Analysis"]
end
%% AI Analysis
subgraph "AI Analysis Phase"
AGENTS["🤖 6 Specialized Agents"]
subgraph "Clinical Track"
EFF["💊 Efficacy Data:
Primary endpoints
Secondary outcomes
Subgroup analyses"]
SAF["⚠️ Safety Data:
Adverse events
Laboratory values
Drug interactions"]
EVI["📈 Evidence Synthesis:
Meta-analyses
Real-world data
Quality assessment"]
end
subgraph "Strategic Track"
COMP["⚖️ Competitive Analysis:
Market positioning
Head-to-head comparisons
Pricing strategies"]
REG["🏛️ Regulatory Status:
Approval timelines
Label differences
Guidelines positioning"]
MKT["💼 Market Access:
Reimbursement status
Health economics
Payer perspectives"]
end
end
%% Section Generation
subgraph "Section Generation"
INDEP["🔒 Independent Sections"]
SEC1["📋 Efficacy Section
12 pages, 45 citations"]
SEC2["📋 Safety Section
8 pages, 32 citations"]
SEC3["📋 Competitive Section
10 pages, 28 citations"]
SEC4["📋 Regulatory Section
15 pages, 67 citations"]
SEC5["📋 Evidence Section
9 pages, 38 citations"]
SEC6["📋 Market Section
7 pages, 21 citations"]
end
%% Quality Assurance
subgraph "Quality Assurance"
QA["✅ Quality Validation"]
CITE["🔍 Citation Check:
231 of 231 verified"]
MED["🩺 Medical Review:
Accuracy score: 96%"]
COMP2["📊 Compliance Check:
All requirements met"]
CONF["📈 Confidence Score: 94%"]
end
%% Report Assembly
subgraph "Report Assembly"
STITCH["🧩 Intelligent Stitching"]
EXEC["📄 Executive Summary
2 pages"]
TOC["📑 Table of Contents"]
MAIN["📖 Main Report
61 pages total"]
APPEND["📎 Appendices
Data tables, references"]
end
%% Final Output
subgraph "Output Layer"
FINAL["📤 Final Deliverables"]
PDF["📄 PDF Report
Professional format"]
EXCEL["📊 Excel Workbook
Data tables and charts"]
DASH["🖥️ Interactive Dashboard
Web-based exploration"]
end
%% Flow Connections
USER --> DC
DC --> DOC1
DC --> DOC2
DC --> DOC3
DC --> DOC4
DC --> DOC5
DC --> DOC6
DOC1 --> TOTAL
DOC2 --> TOTAL
DOC3 --> TOTAL
DOC4 --> TOTAL
DOC5 --> TOTAL
DOC6 --> TOTAL
TOTAL --> PROC
PROC --> TEXT
PROC --> TABLES
PROC --> IMAGES
PROC --> META
TEXT --> CORPUS
TABLES --> CORPUS
IMAGES --> CORPUS
META --> CORPUS
CORPUS --> AGENTS
AGENTS --> EFF
AGENTS --> SAF
AGENTS --> EVI
AGENTS --> COMP
AGENTS --> REG
AGENTS --> MKT
EFF --> INDEP
SAF --> INDEP
EVI --> INDEP
COMP --> INDEP
REG --> INDEP
MKT --> INDEP
INDEP --> SEC1
INDEP --> SEC2
INDEP --> SEC3
INDEP --> SEC4
INDEP --> SEC5
INDEP --> SEC6
SEC1 --> QA
SEC2 --> QA
SEC3 --> QA
SEC4 --> QA
SEC5 --> QA
SEC6 --> QA
QA --> CITE
QA --> MED
QA --> COMP2
QA --> CONF
CITE --> STITCH
MED --> STITCH
COMP2 --> STITCH
CONF --> STITCH
STITCH --> EXEC
STITCH --> TOC
STITCH --> MAIN
STITCH --> APPEND
EXEC --> FINAL
TOC --> FINAL
MAIN --> FINAL
APPEND --> FINAL
FINAL --> PDF
FINAL --> EXCEL
FINAL --> DASH
%% Styling
classDef input fill:#e8f5e8,stroke:#2e7d32,stroke-width:3px
classDef collection fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
classDef processing fill:#fff3e0,stroke:#f57c00,stroke-width:2px
classDef analysis fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
classDef sections fill:#e0f2f1,stroke:#00695c,stroke-width:2px
classDef quality fill:#f1f8e9,stroke:#558b2f,stroke-width:2px
classDef assembly fill:#fce4ec,stroke:#c2185b,stroke-width:2px
classDef output fill:#e8f5e8,stroke:#388e3c,stroke-width:3px
class USER input
class DC,DOC1,DOC2,DOC3,DOC4,DOC5,DOC6,TOTAL collection
class PROC,TEXT,TABLES,IMAGES,META,CORPUS processing
class AGENTS,EFF,SAF,EVI,COMP,REG,MKT analysis
class INDEP,SEC1,SEC2,SEC3,SEC4,SEC5,SEC6 sections
class QA,CITE,MED,COMP2,CONF quality
class STITCH,EXEC,TOC,MAIN,APPEND assembly
class FINAL,PDF,EXCEL,DASH output