; ============================================================================= ; VICIdial Multi-Server Cluster — Load Balancing, Replication & Shared Storage ; ============================================================================= ; Generated by ViciStack — https://vicistack.com — Free VICIdial optimization ; ; WHAT THIS FILE DOES: ; Configuration for a production VICIdial cluster: database replication, ; dialer server separation, web server load balancing, NFS shared storage ; for recordings, and Kamailio SIP load balancing. This is the setup for ; 50+ agent deployments where a single server isn't enough. ; ; ARCHITECTURE: ; ┌─────────────┐ ┌─────────────┐ ; │ Web/Admin │ │ Web/Admin │ ; │ Server 1 │ │ Server 2 │ (Apache + PHP, agent interface) ; └──────┬───────┘ └──────┬───────┘ ; │ │ ; ┌──────┴────────────────────┴───────┐ ; │ HAProxy / Nginx │ (HTTP load balancer) ; └──────┬────────────────────┬───────┘ ; │ │ ; ┌──────┴───────┐ ┌─────┴────────┐ ; │ Dialer/ │ │ Dialer/ │ ; │ Asterisk 1 │ │ Asterisk 2 │ (Asterisk + VICIdial dialer) ; └──────┬───────┘ └──────┬───────┘ ; │ │ ; ┌──────┴────────────────────┴───────┐ ; │ Kamailio SIP Proxy │ (SIP load balancing + failover) ; └──────┬────────────────────┬───────┘ ; │ │ ; ┌──────┴───────┐ ┌─────┴────────┐ ; │ MySQL │────▶│ MySQL │ ; │ Primary │ │ Replica │ (Master-slave replication) ; └──────────────┘ └──────────────┘ ; │ ; ┌──────┴───────┐ ; │ NFS Server │ (Shared recordings + sounds) ; └──────────────┘ ; ; FULL GUIDE: ; https://vicistack.com/blog/vicidial-cluster-guide/ ; https://vicistack.com/blog/vicidial-kamailio-load-balancing/ ; https://vicistack.com/blog/vicidial-database-partitioning/ ; https://vicistack.com/blog/vicidial-mysql-optimization/ ; https://vicistack.com/blog/vicidial-disaster-recovery/ ; ; NEED HELP? hello@vicistack.com ; ============================================================================= ; ============================================================================= ; SECTION 1: MYSQL DATABASE REPLICATION ; ============================================================================= ; VICIdial's database is the backbone. Every call, every lead, every agent ; action touches MySQL. A single DB server becomes the bottleneck around ; 80-100 concurrent agents. Replication splits read queries to the replica, ; and provides a failover target if the primary dies. ; ; Architecture: Primary (read-write) → Replica (read-only) ; VICIdial reads (reports, agent screens) go to replica. ; VICIdial writes (call logs, dispositions) go to primary. ; ============================================================================= ; --- Primary Server: /etc/my.cnf --- ; Add under [mysqld]: ; [mysqld] ; server-id = 1 ; ; Unique ID for this server in the replication topology. ; ; Primary = 1, Replica = 2, etc. Must be unique. ; ; log-bin = mysql-bin ; ; Enable binary logging. This is the replication stream — every write ; ; on the primary gets logged here and sent to the replica. ; ; binlog-format = ROW ; ; ROW format is safer than STATEMENT for VICIdial. STATEMENT can cause ; ; replication drift on non-deterministic queries (which VICIdial has). ; ; binlog-expire-logs-seconds = 259200 ; ; Keep binary logs for 3 days (259200 seconds). Long enough to recover ; ; from a replica outage, short enough to not fill your disk. ; ; sync-binlog = 1 ; ; Sync binary log to disk on every commit. Prevents data loss on crash. ; ; Small performance hit (~5%) but worth it for data integrity. ; ; innodb-flush-log-at-trx-commit = 1 ; ; Flush InnoDB log on every transaction commit. Combined with sync-binlog, ; ; this gives you ACID compliance and crash-safe replication. ; ; ; --- Performance tuning for VICIdial --- ; innodb-buffer-pool-size = 8G ; ; Set to 60-70% of total RAM on the DB server. This is the single most ; ; impactful MySQL performance setting. VICIdial's vicidial_log and ; ; vicidial_closer_log tables are hammered constantly — they need to ; ; fit in the buffer pool. ; ; 4GB RAM server → 2.5G. 16GB server → 10G. 32GB server → 20G. ; ; innodb-log-file-size = 1G ; ; Larger log files = fewer checkpoints = better write performance. ; ; 1G is good for 50-200 agents. 2G for 200+ agents. ; ; innodb-io-capacity = 2000 ; ; Tells InnoDB how fast your storage is. HDD: 200. SSD: 2000. NVMe: 10000. ; ; If your DB is on spinning rust, this is the second thing to fix (after ; ; getting an SSD). ; ; max-connections = 500 ; ; VICIdial opens a lot of DB connections — each web page, each cron process, ; ; each dial attempt. 500 handles a 200-agent cluster. Default 151 is too low. ; ; ; See: https://vicistack.com/blog/vicidial-mysql-optimization/ ; --- Replica Server: /etc/my.cnf --- ; Add under [mysqld]: ; [mysqld] ; server-id = 2 ; relay-log = relay-bin ; read-only = 1 ; ; Prevent accidental writes to the replica. ; ; VICIdial should only send reads here. ; ; innodb-buffer-pool-size = 8G ; ; Same tuning as primary. The replica handles all report queries, ; ; which can be heavier than writes. ; ; ; Connect replica to primary: ; ; mysql> CHANGE MASTER TO ; ; MASTER_HOST='PRIMARY_IP', ; ; MASTER_USER='replication_user', ; ; MASTER_PASSWORD='STRONG_PASSWORD', ; ; MASTER_LOG_FILE='mysql-bin.000001', ; ; MASTER_LOG_POS=0; ; ; mysql> START SLAVE; ; ; mysql> SHOW SLAVE STATUS\G ; ; Look for: Slave_IO_Running: Yes, Slave_SQL_Running: Yes ; ; Seconds_Behind_Master should be 0 (or close to 0). ; --- Replication User (create on primary) --- ; CREATE USER 'replication_user'@'REPLICA_IP' IDENTIFIED BY 'STRONG_PASSWORD'; ; GRANT REPLICATION SLAVE ON *.* TO 'replication_user'@'REPLICA_IP'; ; FLUSH PRIVILEGES; ; ============================================================================= ; SECTION 2: VICIDIAL SERVER CONFIGURATION ; ============================================================================= ; Each VICIdial server has /etc/astguiclient.conf. This tells it what role ; to play in the cluster: web, dialer, or both. ; ============================================================================= ; --- Dialer server: /etc/astguiclient.conf --- ; VARserver_ip=DIALER_1_IP ; ; This server's IP. Must match what's in the VICIdial servers table. ; ; VARDB_server=PRIMARY_DB_IP ; ; Point to the MySQL primary for writes. ; ; VARDB_port=3306 ; VARDB_user=cron ; VARDB_pass=YOUR_DB_PASSWORD ; VARDB_database=asterisk ; ; VARactive_keepalives=123456 ; ; Which VICIdial cron processes to run on this server. ; ; 1 = AST_update (Asterisk channel monitor) ; ; 2 = AST_send_listen (send calls) ; ; 3 = AST_VDauto_dial (predictive dialer engine — THE IMPORTANT ONE) ; ; 4 = AST_VDremote_agents ; ; 5 = AST_VDadapt (adaptive dial level) ; ; 6 = FastAGI server ; ; ; ; On a dialer-only server: VARactive_keepalives=123456 ; ; On a web-only server: VARactive_keepalives= (empty — no cron processes) ; ; VARfastagi_log_min_servers=3 ; VARfastagi_log_max_servers=16 ; ; FastAGI server count. Scale with agent count. ; ; 3-16 handles up to 100 agents per dialer. Add more for larger deployments. ; --- Web server: /etc/astguiclient.conf --- ; VARserver_ip=WEB_1_IP ; VARDB_server=REPLICA_DB_IP ; ; Point web servers at the READ REPLICA for all those report queries. ; ; Reduces load on the primary database. ; ; ; ; EXCEPTION: Certain VICIdial pages MUST talk to the primary: ; ; - Agent screen (writes dispositions, call logs) ; ; - Admin changes (campaign settings, user changes) ; ; VICIdial 3.14+ handles this with the $VARDB_server_write setting. ; ; VARDB_server_write=PRIMARY_DB_IP ; ; Writes go to primary, reads go to replica. Split-brain done right. ; ; VARactive_keepalives= ; ; Empty — web servers don't run dialer crons. ; --- VICIdial Admin: Servers Table --- ; Admin > Admin > Servers ; Add each server with its role: ; Dialer 1: IP=DIALER_1_IP, Active=Y, Active Asterisk Server=Y ; Dialer 2: IP=DIALER_2_IP, Active=Y, Active Asterisk Server=Y ; Web 1: IP=WEB_1_IP, Active=Y, Active Asterisk Server=N ; Web 2: IP=WEB_2_IP, Active=Y, Active Asterisk Server=N ; ============================================================================= ; SECTION 3: NFS SHARED STORAGE FOR RECORDINGS ; ============================================================================= ; All servers need access to the same recordings. NFS provides a shared ; filesystem. The recording lands on whichever dialer handled the call, ; and NFS makes it accessible from any web server for playback. ; ============================================================================= ; --- NFS Server Setup --- ; Install: yum install nfs-utils ; ; Create the shared directory: ; mkdir -p /recordings/monitor ; chown asterisk:asterisk /recordings/monitor ; ; --- /etc/exports (NFS server) --- ; /recordings DIALER_1_IP(rw,sync,no_subtree_check,no_root_squash) ; /recordings DIALER_2_IP(rw,sync,no_subtree_check,no_root_squash) ; /recordings WEB_1_IP(ro,sync,no_subtree_check,no_root_squash) ; /recordings WEB_2_IP(ro,sync,no_subtree_check,no_root_squash) ; ; Dialer servers: rw (read-write, they create recordings) ; Web servers: ro (read-only, they just play back recordings) ; ; Apply: exportfs -ra && systemctl restart nfs-server ; ; --- NFS Client Setup (on each VICIdial server) --- ; yum install nfs-utils ; ; --- /etc/fstab (on each client) --- ; NFS_SERVER_IP:/recordings /var/spool/asterisk/monitor nfs rw,soft,intr,timeo=30,retrans=3 0 0 ; ; Mount options explained: ; rw = read-write (change to ro for web servers) ; soft = return error if NFS is down (vs. hanging forever) ; intr = allow interrupted operations (so Ctrl-C works) ; timeo = timeout in deciseconds (30 = 3 seconds) ; retrans = retry count before giving up ; ; Mount: mount -a ; Verify: df -h /var/spool/asterisk/monitor ; ; PERFORMANCE NOTE: NFS over gigabit ethernet is fine for recordings. ; If you have 200+ agents and notice recording write delays, consider: ; 1. 10 Gigabit NFS ; 2. GlusterFS (distributed, no single NFS server) ; 3. Write recordings locally, then rsync to shared storage via cron ; ============================================================================= ; SECTION 4: KAMAILIO SIP LOAD BALANCER ; ============================================================================= ; Kamailio distributes SIP traffic across your Asterisk servers. ; If one Asterisk dies, Kamailio routes calls to the surviving one. ; ; See: https://vicistack.com/blog/vicidial-kamailio-load-balancing/ ; ============================================================================= ; --- /etc/kamailio/kamailio.cfg (simplified) --- ; ; # Load dispatcher module for load balancing ; loadmodule "dispatcher.so" ; modparam("dispatcher", "db_url", "mysql://kamailio:PASS@PRIMARY_DB_IP/kamailio") ; modparam("dispatcher", "ds_ping_interval", 10) ; ; Ping each Asterisk server every 10 seconds to check if it's alive. ; modparam("dispatcher", "ds_probing_mode", 1) ; ; Probing mode 1 = always probe, even if the server seems healthy. ; ; Catches "zombie" Asterisk processes that accept connections but don't ; ; handle calls. ; modparam("dispatcher", "ds_ping_reply_codes", "class2") ; ; Accept any 2xx response as "healthy." ; ; # Dispatcher routing ; route[DISPATCH] { ; if (!ds_select_dst("1", "4")) { ; # "1" = dispatcher set (group of Asterisk servers) ; # "4" = round-robin algorithm ; # Other algorithms: ; # 0 = hash over callid (sticky sessions — same caller always same server) ; # 4 = round-robin (distribute evenly) ; # 10 = weight-based (route more to beefier servers) ; send_reply("503", "All dialers down"); ; exit; ; } ; route(RELAY); ; } ; ; # --- Dispatcher sets (/etc/kamailio/dispatcher.list) --- ; # setid destination flags priority attributes ; 1 sip:DIALER_1_IP:5060 0 0 weight=50 ; 1 sip:DIALER_2_IP:5060 0 0 weight=50 ; ; ; ; Equal weight = equal distribution. If Dialer 1 is beefier, give it ; ; weight=70 and Dialer 2 weight=30. ; ============================================================================= ; SECTION 5: HTTP LOAD BALANCER (HAPROXY) ; ============================================================================= ; HAProxy distributes agent web interface traffic across web servers. ; If Web Server 1 dies, agents automatically get routed to Web Server 2. ; ============================================================================= ; --- /etc/haproxy/haproxy.cfg --- ; global ; log /dev/log local0 ; chroot /var/lib/haproxy ; stats socket /run/haproxy/admin.sock mode 660 level admin ; maxconn 4096 ; user haproxy ; group haproxy ; ; defaults ; log global ; mode http ; option httplog ; option dontlognull ; timeout connect 5000ms ; timeout client 50000ms ; timeout server 50000ms ; ; ; Agent sticky sessions — CRITICAL for VICIdial. ; ; Once an agent logs in to Web Server 1, ALL their requests must go to ; ; Server 1. VICIdial stores session state in PHP sessions, not in the ; ; database. If you round-robin agent requests, their session breaks and ; ; they get logged out mid-call. ; ; frontend https_front ; bind *:443 ssl crt /etc/haproxy/certs/vicidial.pem ; default_backend web_servers ; ; backend web_servers ; balance source ; ; "source" = sticky by client IP. Same agent always hits same server. ; ; Alternative: cookie-based stickiness (more reliable if agents share IP): ; ; balance roundrobin ; ; cookie SERVERID insert indirect nocache ; ; server web1 WEB_1_IP:443 ssl cookie web1 check ; ; server web2 WEB_2_IP:443 ssl cookie web2 check ; ; option httpchk GET /vicidial/welcome.php ; ; Health check: hit a light VICIdial page. If it returns 200, server is up. ; http-check expect status 200 ; ; server web1 WEB_1_IP:443 ssl check inter 5000 ; server web2 WEB_2_IP:443 ssl check inter 5000 ; ; inter 5000 = check every 5 seconds. ; ; ; Stats page (optional but useful for monitoring) ; listen stats ; bind *:8404 ; stats enable ; stats uri /haproxy-stats ; stats realm HAProxy\ Statistics ; stats auth admin:STRONG_PASSWORD ; ============================================================================= ; SECTION 6: CLUSTER MONITORING ; ============================================================================= ; What to watch to make sure the cluster is healthy. ; ; 1. REPLICATION LAG ; On replica: SHOW SLAVE STATUS\G → Seconds_Behind_Master ; Alert if > 5 seconds. Investigate if > 30 seconds. ; Common cause: heavy report queries on replica slowing SQL thread. ; ; 2. ASTERISK CHANNEL COUNT ; asterisk -rx "core show channels count" ; Compare across dialer servers — should be roughly equal if Kamailio ; is load-balancing correctly. ; ; 3. NFS MOUNT STATUS ; df -h /var/spool/asterisk/monitor ; Alert if NFS mount disappears. Recordings will fail silently. ; Add to monitoring: mount | grep nfs | wc -l (should be > 0) ; ; 4. HAPROXY BACKEND STATUS ; http://LB_IP:8404/haproxy-stats ; All backends should show green/UP. Red = server down. ; ; See: https://vicistack.com/blog/vicidial-grafana-realtime-dashboard/ ; https://vicistack.com/blog/vicidial-reporting-monitoring/ ; ; For cluster architecture design and implementation: ; https://vicistack.com — hello@vicistack.com ; =============================================================================