ragcli/config.yaml.example at main · jasperan/ragcli · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Oracle Database 26ai Configuration
oracle:
  username: "rag_user"
  password: "${ORACLE_PASSWORD}"  # Can use env vars
  dsn: "oracle_host:1521/orcl"
  use_tls: true
  tls_wallet_path: null           # Optional, null for TLS connection
  pool_size: 10                   # Connection pooling

# Ollama Configuration
ollama:
  endpoint: "http://localhost:11434"  # API endpoint URL
  auto_detect_models: true            # Auto-refresh model list on startup
  embedding_model: "nomic-embed-text" # Embedding model name
  chat_model: "qwen3.5:35b-a3b"             # Chat model name (MoE: 35B total / 3B active)
  fallback_embedding_models:          # Fallback options if primary unavailable
    - "all-minilm"
    - "mxbai-embed-large"
  timeout: 30                         # API timeout in seconds


# Document Processing
documents:
  chunk_size: 1000                # Default: 1000 tokens per chunk
  chunk_overlap_percentage: 10    # Default: 10% overlap
  supported_formats:             # TXT, MD, PDF
    - txt
    - md
    - pdf
  max_file_size_mb: 100
  temp_dir: "./temp"

# Vector Index Configuration
vector_index:
  auto_select: true              # Auto-select based on data size
  index_type: "HNSW"             # Options: HNSW, IVF_FLAT, HYBRID
  dimension: 768                 # Embedding dimension
  m: 16                          # HNSW parameter: connections per node
  ef_construction: 200           # HNSW parameter: construction effort
  # Oracle in-database embeddings using langchain-oracledb
  use_oracle_embeddings: false   # Set true to use OracleEmbeddings instead of Ollama
  oracle_embedding_params:       # Params when use_oracle_embeddings is true
    provider: "database"         # database, ocigenai, huggingface, openai, etc.
    model: "ALL_MINILM_L12_V2"   # ONNX model loaded in Oracle DB

# RAG Query Configuration
rag:
  top_k: 5                       # Number of documents to retrieve
  min_similarity_score: 0.5      # Minimum similarity threshold
  use_reranking: false           # Future: LLM-based reranking

# Logging Configuration
logging:
  level: "INFO"                  # DEBUG, INFO, WARNING, ERROR
  log_file: "./logs/ragcli.log"
  max_log_size_mb: 50
  backup_count: 5
  detailed_metrics: true         # Log detailed metrics

# API Configuration (AnythingLLM Integration)
api:
  host: "0.0.0.0"
  port: 8000
  cors_origins:
    - "*"                        # Or specific AnythingLLM URL for security
  enable_swagger: true           # Enable API documentation at /docs

# Application Settings
app:
  app_name: "ragcli"
  version: "1.0.0"
  debug: false

# Search Configuration
search:
  strategy: "hybrid"              # hybrid | vector_only | bm25_only
  rrf_k: 60                       # RRF constant
  weights:
    bm25: 1.0
    vector: 1.0
    graph: 0.8
  use_router: true                # Auto-select signals per query

# Session Memory
memory:
  session_timeout_min: 30
  max_recent_turns: 5
  summarize_every: 5
  max_summary_tokens: 300

# Knowledge Graph
knowledge_graph:
  enabled: true
  extraction_model: null          # null = use chat_model
  max_entities_per_chunk: 10
  dedup_threshold: 0.9            # Entity dedup similarity threshold
  max_hops: 2                     # Graph traversal depth

# Feedback
feedback:
  enabled: true
  quality_boost_range: 0.15       # Max boost/penalty from chunk quality
  recalibrate_after: 50           # Recalibrate weights after N feedback events

# Evaluation
evaluation:
  pairs_per_chunk: 2
  max_chunks_per_doc: 20
  live_scoring: false             # Score every query in real-time

# Live Sync
sync:
  enabled: false
  default_poll_interval: 300
  debounce_seconds: 2
  max_file_size_mb: 50
  supported_formats:
    - pdf
    - md
    - txt
    - rst