Master KVM Production Deployment: Real-World Ops Guide & Automation Scripts
This comprehensive guide walks you through KVM virtualization platform deployment in production, covering host preparation, VM creation, advanced networking, storage pool management, performance tuning, monitoring, and automated operational scripts to build a stable and efficient virtualized environment.
KVM Virtualization Platform Production Deployment and Operations
KVM (Kernel-based Virtual Machine) is a core Linux kernel virtualization technology widely used in enterprise environments. Mastering its deployment, management, and optimization is essential for building reliable virtualized platforms.
1. KVM Environment Initialization and Configuration
1.1 Host Environment Preparation
KVM requires CPUs with hardware virtualization extensions (Intel VT‑x or AMD‑V). Advanced settings such as NUMA optimization and CPU affinity should be considered for production workloads.
#!/bin/bash
# KVM host environment initialization script
set -e
check_virtualization_support() {
echo "Checking hardware virtualization support..."
if grep -q "vmx\|svm" /proc/cpuinfo; then
echo "✓ CPU supports hardware virtualization"
else
echo "✗ CPU does not support hardware virtualization"
exit 1
fi
if [ -f /sys/module/kvm_intel/parameters/nested ]; then
nested_virt=$(cat /sys/module/kvm_intel/parameters/nested)
echo "Nested virtualization status: $nested_virt"
fi
}
install_kvm_packages() {
echo "Installing KVM packages..."
if [ -f /etc/redhat-release ]; then
yum groupinstall -y "Virtualization Host"
yum install -y qemu-kvm libvirt libvirt-python libguestfs-tools virt-install
elif [ -f /etc/debian_version ]; then
apt-get update
apt-get install -y qemu-kvm libvirt-daemon-system libvirt-clients bridge-utils
fi
systemctl enable libvirtd
systemctl start libvirtd
if virsh version > /dev/null 2>&1; then
echo "✓ KVM environment installed successfully"
else
echo "✗ KVM environment installation failed"
exit 1
fi
}
configure_network_bridge() {
echo "Configuring network bridge..."
cat > /etc/sysconfig/network-scripts/ifcfg-br0 <<EOF
DEVICE=br0
TYPE=Bridge
BOOTPROTO=static
IPADDR=192.168.1.20
NETMASK=255.255.255.0
GATEWAY=192.168.1.1
DNS1=8.8.8.8
ONBOOT=yes
DELAY=0
EOF
cat > /etc/sysconfig/network-scripts/ifcfg-eth0 <<EOF
DEVICE=eth0
TYPE=Ethernet
BOOTPROTO=none
ONBOOT=yes
BRIDGE=br0
EOF
systemctl restart network
if brctl show | grep -q br0; then
echo "✓ Network bridge configured successfully"
else
echo "✗ Network bridge configuration failed"
fi
}
optimize_kernel_parameters() {
echo "Optimizing kernel parameters..."
cat >> /etc/sysctl.conf <<EOF
# KVM optimization
vm.swappiness = 10
vm.dirty_ratio = 5
vm.dirty_background_ratio = 2
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 87380 16777216
net.ipv4.tcp_wmem = 4096 65536 16777216
kernel.numa_balancing = 0
EOF
sysctl -p
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo "never" > /sys/kernel/mm/transparent_hugepage/enabled
echo "✓ Kernel parameters optimized"
}
configure_storage_pool() {
echo "Configuring storage pools..."
mkdir -p /var/lib/libvirt/images
mkdir -p /var/lib/libvirt/iso
virsh pool-define-as --name default --type dir --target /var/lib/libvirt/images
virsh pool-autostart default
virsh pool-start default
virsh pool-define-as --name iso --type dir --target /var/lib/libvirt/iso
virsh pool-autostart iso
virsh pool-start iso
echo "✓ Storage pools created"
}
check_virtualization_support
install_kvm_packages
configure_network_bridge
optimize_kernel_parameters
configure_storage_pool
echo "KVM host environment initialization complete"1.2 VM Creation and Management
#!/bin/bash
# KVM VM creation and management script
create_vm() {
local vm_name=$1
local vm_memory=$2
local vm_vcpus=$3
local vm_disk_size=$4
local vm_iso_path=$5
echo "Creating VM: $vm_name"
echo "Memory: ${vm_memory}MB, CPU: ${vm_vcpus} cores, Disk: ${vm_disk_size}GB"
qemu-img create -f qcow2 /var/lib/libvirt/images/${vm_name}.qcow2 ${vm_disk_size}G
virt-install \
--name $vm_name \
--memory $vm_memory \
--vcpus $vm_vcpus \
--disk path=/var/lib/libvirt/images/${vm_name}.qcow2,format=qcow2,bus=virtio \
--cdrom $vm_iso_path \
--network bridge=br0,model=virtio \
--graphics vnc,listen=0.0.0.0,port=5900 \
--os-type linux \
--os-variant rhel7 \
--boot hd,cdrom \
--noautoconsole
if [ $? -eq 0 ]; then
echo "✓ VM $vm_name created successfully"
return 0
else
echo "✗ VM $vm_name creation failed"
return 1
fi
}
batch_create_vms() {
echo "Batch creating VMs..."
local -a vm_configs=(
"web-vm1:4096:2:50:/var/lib/libvirt/iso/centos7.iso"
"web-vm2:4096:2:50:/var/lib/libvirt/iso/centos7.iso"
"db-vm1:8192:4:100:/var/lib/libvirt/iso/centos7.iso"
"db-vm2:8192:4:100:/var/lib/libvirt/iso/centos7.iso"
"app-vm1:6144:4:80:/var/lib/libvirt/iso/centos7.iso"
"app-vm2:6144:4:80:/var/lib/libvirt/iso/centos7.iso"
)
for vm_config in "${vm_configs[@]}"; do
IFS=':' read -r name memory vcpus disk_size iso_path <<< "$vm_config"
create_vm "$name" "$memory" "$vcpus" "$disk_size" "$iso_path"
sleep 5
done
}
manage_vm() {
local action=$1
local vm_name=$2
case $action in
start) virsh start $vm_name; echo "Started VM: $vm_name";;
stop) virsh shutdown $vm_name; echo "Stopped VM: $vm_name";;
force-stop) virsh destroy $vm_name; echo "Force stopped VM: $vm_name";;
restart) virsh reboot $vm_name; echo "Restarted VM: $vm_name";;
status) virsh dominfo $vm_name;;
console) virsh console $vm_name;;
*) echo "Usage: manage_vm {start|stop|force-stop|restart|status|console} vm_name"; return 1;;
esac
}
snapshot_management() {
local action=$1
local vm_name=$2
local snapshot_name=$3
case $action in
create) virsh snapshot-create-as $vm_name $snapshot_name "Snapshot created on $(date)"; echo "Created snapshot $snapshot_name for $vm_name";;
list) virsh snapshot-list $vm_name;;
revert) virsh snapshot-revert $vm_name $snapshot_name; echo "Reverted $vm_name to snapshot $snapshot_name";;
delete) virsh snapshot-delete $vm_name $snapshot_name; echo "Deleted snapshot $snapshot_name from $vm_name";;
*) echo "Usage: snapshot_management {create|list|revert|delete} vm_name [snapshot_name]"; return 1;;
esac
}
clone_vm() {
local source_vm=$1
local target_vm=$2
if virsh dominfo $source_vm | grep -q "running"; then
echo "Source VM is running, please shut it down first"
return 1
fi
virt-clone --original $source_vm --name $target_vm --auto-clone
if [ $? -eq 0 ]; then
echo "✓ VM cloned successfully"
else
echo "✗ VM clone failed"
return 1
fi
}
case $1 in
create) create_vm $2 $3 $4 $5 $6;;
batch-create) batch_create_vms;;
manage) manage_vm $2 $3;;
snapshot) snapshot_management $2 $3 $4;;
clone) clone_vm $2 $3;;
*) echo "Usage: $0 {create|batch-create|manage|snapshot|clone} [parameters...]"; exit 1;;
esac2. Advanced Network Configuration and Management
2.1 Network Virtualization Configuration
KVM supports multiple network modes such as NAT, bridge, and isolated networks. Production deployments often require VLANs, bonding, and Open vSwitch for complex scenarios.
#!/usr/bin/env python3
# KVM network management script
import subprocess, xml.etree.ElementTree as ET, json, sys
class KVMNetworkManager:
def __init__(self):
self.virsh_cmd = "virsh"
def create_bridge_network(self, network_name, bridge_name, subnet):
print(f"Creating bridge network: {network_name}")
network_xml = f"""
<network>
<name>{network_name}</name>
<forward mode='bridge'/>
<bridge name='{bridge_name}'/>
</network>
"""
with open(f'/tmp/{network_name}.xml', 'w') as f:
f.write(network_xml)
result = subprocess.run([self.virsh_cmd, 'net-define', f'/tmp/{network_name}.xml'], capture_output=True, text=True)
if result.returncode == 0:
subprocess.run([self.virsh_cmd, 'net-start', network_name])
subprocess.run([self.virsh_cmd, 'net-autostart', network_name])
print(f"✓ Bridge network {network_name} created successfully")
return True
else:
print(f"✗ Bridge network {network_name} creation failed: {result.stderr}")
return False
def create_nat_network(self, network_name, subnet, dhcp_start, dhcp_end):
print(f"Creating NAT network: {network_name}")
network_xml = f"""
<network>
<name>{network_name}</name>
<forward mode='nat'/>
<bridge name='virbr-{network_name}' stp='on' delay='0'/>
<ip address='{subnet.split('/')[0]}' netmask='255.255.255.0'>
<dhcp>
<range start='{dhcp_start}' end='{dhcp_end}'/>
</dhcp>
</ip>
</network>
"""
with open(f'/tmp/{network_name}.xml', 'w') as f:
f.write(network_xml)
result = subprocess.run([self.virsh_cmd, 'net-define', f'/tmp/{network_name}.xml'], capture_output=True, text=True)
if result.returncode == 0:
subprocess.run([self.virsh_cmd, 'net-start', network_name])
subprocess.run([self.virsh_cmd, 'net-autostart', network_name])
print(f"✓ NAT network {network_name} created successfully")
return True
else:
print(f"✗ NAT network {network_name} creation failed: {result.stderr}")
return False
def create_vlan_network(self, network_name, vlan_id, physical_interface):
print(f"Creating VLAN network: {network_name}, VLAN ID: {vlan_id}")
try:
vlan_interface = f"{physical_interface}.{vlan_id}"
subprocess.run(['ip', 'link', 'add', 'link', physical_interface, 'name', vlan_interface, 'type', 'vlan', 'id', str(vlan_id)])
subprocess.run(['ip', 'link', 'set', vlan_interface, 'up'])
bridge_name = f"br-vlan{vlan_id}"
subprocess.run(['brctl', 'addbr', bridge_name])
subprocess.run(['brctl', 'addif', bridge_name, vlan_interface])
subprocess.run(['ip', 'link', 'set', bridge_name, 'up'])
network_xml = f"""
<network>
<name>{network_name}</name>
<forward mode='bridge'/>
<bridge name='{bridge_name}'/>
</network>
"""
with open(f'/tmp/{network_name}.xml', 'w') as f:
f.write(network_xml)
result = subprocess.run([self.virsh_cmd, 'net-define', f'/tmp/{network_name}.xml'], capture_output=True, text=True)
if result.returncode == 0:
subprocess.run([self.virsh_cmd, 'net-start', network_name])
subprocess.run([self.virsh_cmd, 'net-autostart', network_name])
print(f"✓ VLAN network {network_name} created successfully")
return True
else:
print(f"✗ VLAN network {network_name} creation failed: {result.stderr}")
return False
except Exception as e:
print(f"VLAN network creation exception: {e}")
return False
def attach_interface_to_vm(self, vm_name, network_name, mac_address=None):
print(f"Attaching network interface to VM {vm_name}: {network_name}")
try:
interface_xml = f"""
<interface type='network'>
<source network='{network_name}'/>
<model type='virtio'/>
"""
if mac_address:
interface_xml += f" <mac address='{mac_address}'/>
"
interface_xml += "</interface>"
with open(f'/tmp/interface-{vm_name}.xml', 'w') as f:
f.write(interface_xml)
result = subprocess.run([self.virsh_cmd, 'attach-device', vm_name, f'/tmp/interface-{vm_name}.xml', '--persistent'], capture_output=True, text=True)
if result.returncode == 0:
print("✓ Network interface added successfully")
return True
else:
print(f"✗ Network interface addition failed: {result.stderr}")
return False
except Exception as e:
print(f"Add interface exception: {e}")
return False
def get_network_info(self, network_name=None):
try:
if network_name:
result = subprocess.run([self.virsh_cmd, 'net-info', network_name], capture_output=True, text=True)
else:
result = subprocess.run([self.virsh_cmd, 'net-list', '--all'], capture_output=True, text=True)
if result.returncode == 0:
return result.stdout
else:
return f"Failed to get network info: {result.stderr}"
except Exception as e:
return f"Network info exception: {e}"
def configure_network_qos(self, vm_name, interface_name, inbound_avg, outbound_avg):
print(f"Configuring QoS for VM {vm_name} interface {interface_name}")
try:
result = subprocess.run([self.virsh_cmd, 'dumpxml', vm_name], capture_output=True, text=True)
if result.returncode != 0:
print(f"Failed to get VM config: {result.stderr}")
return False
root = ET.fromstring(result.stdout)
for interface in root.findall('.//interface'):
if interface.find('target').get('dev') == interface_name:
bandwidth = ET.SubElement(interface, 'bandwidth')
inbound = ET.SubElement(bandwidth, 'inbound')
inbound.set('average', str(inbound_avg))
outbound = ET.SubElement(bandwidth, 'outbound')
outbound.set('average', str(outbound_avg))
break
with open(f'/tmp/{vm_name}-qos.xml', 'w') as f:
f.write(ET.tostring(root, encoding='unicode'))
result = subprocess.run([self.virsh_cmd, 'define', f'/tmp/{vm_name}-qos.xml'], capture_output=True, text=True)
if result.returncode == 0:
print("✓ Network QoS configuration successful")
return True
else:
print(f"✗ Network QoS configuration failed: {result.stderr}")
return False
except Exception as e:
print(f"QoS configuration exception: {e}")
return False
if __name__ == "__main__":
manager = KVMNetworkManager()
manager.create_bridge_network("production-bridge", "br0", "192.168.1.0/24")
manager.create_nat_network("development-nat", "192.168.100.0/24", "192.168.100.100", "192.168.100.200")
manager.create_vlan_network("vlan-100", 100, "eth0")
manager.attach_interface_to_vm("web-vm1", "production-bridge", "52:54:00:12:34:56")
manager.configure_network_qos("web-vm1", "vnet0", 1000, 1000)
print(manager.get_network_info())3. Storage Management and Optimization
3.2 Storage Pool Management
KVM supports various storage backends such as local LVM, NFS, iSCSI, and Ceph. Choose the appropriate solution based on performance, reliability, and cost, and configure the pools accordingly.
#!/bin/bash
# KVM storage management script
create_lvm_pool() {
local pool_name=$1
local vg_name=$2
echo "Creating LVM pool: $pool_name"
if ! vgdisplay $vg_name > /dev/null 2>&1; then
echo "Error: Volume group $vg_name does not exist"
return 1
fi
virsh pool-define-as --name $pool_name --type logical --source-name $vg_name --target /dev/$vg_name
if [ $? -eq 0 ]; then
virsh pool-autostart $pool_name
virsh pool-start $pool_name
echo "✓ LVM pool $pool_name created successfully"
else
echo "✗ LVM pool $pool_name creation failed"
return 1
fi
}
create_nfs_pool() {
local pool_name=$1
local nfs_server=$2
local nfs_path=$3
local mount_point=$4
echo "Creating NFS pool: $pool_name"
mkdir -p $mount_point
virsh pool-define-as --name $pool_name --type netfs --source-host $nfs_server --source-path $nfs_path --target $mount_point
if [ $? -eq 0 ]; then
virsh pool-autostart $pool_name
virsh pool-start $pool_name
echo "✓ NFS pool $pool_name created successfully"
else
echo "✗ NFS pool $pool_name creation failed"
return 1
fi
}
create_iscsi_pool() {
local pool_name=$1
local iscsi_target=$2
local iscsi_iqn=$3
echo "Creating iSCSI pool: $pool_name"
iscsiadm -m discovery -t st -p $iscsi_target
iscsiadm -m node -T $iscsi_iqn -p $iscsi_target --login
virsh pool-define-as --name $pool_name --type iscsi --source-host $iscsi_target --source-dev $iscsi_iqn
if [ $? -eq 0 ]; then
virsh pool-autostart $pool_name
virsh pool-start $pool_name
echo "✓ iSCSI pool $pool_name created successfully"
else
echo "✗ iSCSI pool $pool_name creation failed"
return 1
fi
}
create_ceph_pool() {
local pool_name=$1
local ceph_pool=$2
local ceph_user=$3
echo "Creating Ceph pool: $pool_name"
cat > /tmp/${pool_name}.xml <<EOF
<pool type='rbd'>
<name>$pool_name</name>
<source>
<name>$ceph_pool</name>
<host name='192.168.2.10' port='6789'/>
<host name='192.168.2.11' port='6789'/>
<host name='192.168.2.12' port='6789'/>
<auth username='$ceph_user' type='ceph'>
<secret type='ceph' usage='client.admin secret'/>
</auth>
</source>
</pool>
EOF
virsh pool-define /tmp/${pool_name}.xml
if [ $? -eq 0 ]; then
virsh pool-autostart $pool_name
virsh pool-start $pool_name
echo "✓ Ceph pool $pool_name created successfully"
else
echo "✗ Ceph pool $pool_name creation failed"
return 1
fi
}
create_storage_volume() {
local pool_name=$1
local volume_name=$2
local volume_size=$3
local format=${4:-qcow2}
echo "Creating storage volume: $volume_name ($volume_size GB)"
virsh vol-create-as $pool_name $volume_name ${volume_size}G --format $format
if [ $? -eq 0 ]; then
echo "✓ Volume $volume_name created successfully"
virsh vol-info $volume_name --pool $pool_name
else
echo "✗ Volume $volume_name creation failed"
return 1
fi
}
optimize_storage_performance() {
local vm_name=$1
echo "Optimizing storage performance for VM $vm_name"
virsh dumpxml $vm_name > /tmp/${vm_name}_storage.xml
sed -i 's/cache="none"/cache="writeback"/' /tmp/${vm_name}_storage.xml
sed -i 's/io="threads"/io="native"/' /tmp/${vm_name}_storage.xml
sed -i '/<disk type=/a\ <driver name="qemu" type="qcow2" cache="writeback" io="native" discard="unmap"/>' /tmp/${vm_name}_storage.xml
virsh define /tmp/${vm_name}_storage.xml
if [ $? -eq 0 ]; then
echo "✓ Storage performance optimization completed"
else
echo "✗ Storage performance optimization failed"
return 1
fi
}
monitor_storage_usage() {
echo "=== Storage Pool Usage ==="
virsh pool-list --all
echo -e "
=== Detailed Pool Info ==="
for pool in $(virsh pool-list --name); do
echo "--- Pool: $pool ---"
virsh pool-info $pool
echo
done
echo "=== Volume Usage ==="
for pool in $(virsh pool-list --name); do
echo "--- Volumes in $pool ---"
virsh vol-list $pool
echo
done
}
backup_storage() {
local vm_name=$1
local backup_path=$2
echo "Backing up storage for VM $vm_name"
mkdir -p $backup_path
disks=$(virsh domblklist $vm_name | grep -v "^Target" | grep -v "^------" | awk '{print $2}')
for disk in $disks; do
if [ -f "$disk" ]; then
echo "Backing up disk: $disk"
cp "$disk" "$backup_path/$(basename $disk).backup.$(date +%Y%m%d_%H%M%S)"
fi
done
virsh dumpxml $vm_name > "$backup_path/${vm_name}_config.xml"
echo "✓ Storage backup completed"
}
case $1 in
create-lvm) create_lvm_pool $2 $3;;
create-nfs) create_nfs_pool $2 $3 $4 $5;;
create-iscsi) create_iscsi_pool $2 $3 $4;;
create-ceph) create_ceph_pool $2 $3 $4;;
create-volume) create_storage_volume $2 $3 $4 $5;;
optimize) optimize_storage_performance $2;;
monitor) monitor_storage_usage;;
backup) backup_storage $2 $3;;
*) echo "Usage: $0 {create-lvm|create-nfs|create-iscsi|create-ceph|create-volume|optimize|monitor|backup} [parameters...]"; exit 1;;
esac4. Monitoring and Automated Operations
4.1 Monitoring System Configuration
A full monitoring system should collect host CPU, memory, disk, and network metrics, as well as VM-specific statistics, and trigger alerts when thresholds are exceeded.
#!/usr/bin/env python3
# KVM monitoring and automation script
import subprocess, psutil, time, json, smtplib, logging
from email.mime.text import MIMEText
from datetime import datetime
class KVMMonitor:
def __init__(self):
self.setup_logging()
self.alert_threshold = {
'cpu_usage': 80,
'memory_usage': 85,
'disk_usage': 90,
'network_error_rate': 5
}
def setup_logging(self):
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.FileHandler('/var/log/kvm_monitor.log'), logging.StreamHandler()])
self.logger = logging.getLogger(__name__)
def get_host_metrics(self):
try:
metrics = {
'timestamp': datetime.now().isoformat(),
'cpu_usage': psutil.cpu_percent(interval=1),
'memory': psutil.virtual_memory()._asdict(),
'disk': {},
'network': psutil.net_io_counters()._asdict(),
'load_average': psutil.getloadavg()
}
for part in psutil.disk_partitions():
try:
usage = psutil.disk_usage(part.mountpoint)
metrics['disk'][part.mountpoint] = {'total': usage.total, 'used': usage.used, 'free': usage.free, 'percent': usage.percent}
except PermissionError:
continue
return metrics
except Exception as e:
self.logger.error(f"Failed to get host metrics: {e}")
return None
def get_vm_metrics(self, vm_name):
try:
result = subprocess.run(['virsh', 'dominfo', vm_name], capture_output=True, text=True)
if result.returncode != 0:
return None
vm_info = {}
for line in result.stdout.split('
'):
if ':' in line:
k, v = line.split(':', 1)
vm_info[k.strip()] = v.strip()
cpu_result = subprocess.run(['virsh', 'cpu-stats', vm_name], capture_output=True, text=True)
mem_result = subprocess.run(['virsh', 'dommemstat', vm_name], capture_output=True, text=True)
metrics = {
'vm_name': vm_name,
'timestamp': datetime.now().isoformat(),
'state': vm_info.get('State', 'unknown'),
'cpu_time': vm_info.get('CPU time', '0'),
'memory': vm_info.get('Max memory', '0'),
'vcpus': vm_info.get('CPU(s)', '0')
}
disk_result = subprocess.run(['virsh', 'domblkstat', vm_name], capture_output=True, text=True)
if disk_result.returncode == 0:
metrics['disk_io'] = disk_result.stdout
net_result = subprocess.run(['virsh', 'domifstat', vm_name], capture_output=True, text=True)
if net_result.returncode == 0:
metrics['network_io'] = net_result.stdout
return metrics
except Exception as e:
self.logger.error(f"Failed to get VM {vm_name} metrics: {e}")
return None
def check_alerts(self, metrics):
alerts = []
if metrics.get('cpu_usage', 0) > self.alert_threshold['cpu_usage']:
alerts.append(f"CPU usage high: {metrics['cpu_usage']:.2f}%")
mem_percent = metrics.get('memory', {}).get('percent', 0)
if mem_percent > self.alert_threshold['memory_usage']:
alerts.append(f"Memory usage high: {mem_percent:.2f}%")
for mp, info in metrics.get('disk', {}).items():
if info.get('percent', 0) > self.alert_threshold['disk_usage']:
alerts.append(f"Disk {mp} usage high: {info['percent']:.2f}%")
return alerts
def send_alert(self, alerts):
if not alerts:
return
try:
msg = MIMEText('
'.join(alerts))
msg['Subject'] = 'KVM Monitoring Alert'
msg['From'] = '[email protected]'
msg['To'] = '[email protected]'
server = smtplib.SMTP('localhost')
server.send_message(msg)
server.quit()
self.logger.info(f"Alert email sent: {len(alerts)} alerts")
except Exception as e:
self.logger.error(f"Failed to send alert email: {e}")
def auto_remediation(self, vm_name, issue_type):
self.logger.info(f"Running auto remediation for {vm_name} - {issue_type}")
try:
if issue_type == 'vm_down':
subprocess.run(['virsh', 'start', vm_name])
self.logger.info(f"VM {vm_name} restarted")
elif issue_type == 'high_memory':
subprocess.run(['virsh', 'setmem', vm_name, '8G', '--live'])
self.logger.info(f"Increased memory for VM {vm_name}")
elif issue_type == 'disk_full':
subprocess.run(['virsh', 'blockresize', vm_name, f"/var/lib/libvirt/images/{vm_name}.qcow2", '100G'])
self.logger.info(f"Extended disk for VM {vm_name}")
except Exception as e:
self.logger.error(f"Auto remediation failed: {e}")
def monitor_loop(self):
while True:
try:
host_metrics = self.get_host_metrics()
if host_metrics:
alerts = self.check_alerts(host_metrics)
if alerts:
self.send_alert(alerts)
result = subprocess.run(['virsh', 'list', '--all', '--name'], capture_output=True, text=True)
if result.returncode == 0:
for vm in result.stdout.strip().split('
'):
if vm:
vm_metrics = self.get_vm_metrics(vm)
if vm_metrics and vm_metrics.get('state') == 'shut off':
self.auto_remediation(vm, 'vm_down')
self.logger.info("Monitoring cycle completed")
except Exception as e:
self.logger.error(f"Monitoring loop error: {e}")
time.sleep(60)
if __name__ == "__main__":
monitor = KVMMonitor()
monitor.monitor_loop()4.2 Automated Operations Tools
#!/bin/bash
# KVM automation tool
SCRIPT_DIR="/opt/kvm-automation"
LOG_FILE="/var/log/kvm_automation.log"
health_check() {
echo "$(date): Running VM health check" >> $LOG_FILE
for vm in $(virsh list --all --name); do
if [ -n "$vm" ]; then
state=$(virsh dominfo $vm | grep State | awk '{print $2}')
if [ "$state" = "shut" ]; then
echo "$(date): VM $vm is shut down, attempting start" >> $LOG_FILE
virsh start $vm
fi
memory_usage=$(virsh dommemstat $vm | grep "actual" | awk '{print $2}')
if [ $memory_usage -gt 8388608 ]; then
echo "$(date): VM $vm memory usage high" >> $LOG_FILE
fi
fi
done
}
auto_backup() {
echo "$(date): Performing automatic backup" >> $LOG_FILE
BACKUP_DIR="/backup/kvm/$(date +%Y%m%d)"
mkdir -p $BACKUP_DIR
for vm in $(virsh list --all --name); do
if [ -n "$vm" ]; then
virsh dumpxml $vm > "$BACKUP_DIR/${vm}_config.xml"
fi
done
find /backup/kvm -type d -mtime +7 -exec rm -rf {} \;
}
performance_optimization() {
echo "$(date): Running performance optimization" >> $LOG_FILE
for vm in $(virsh list --name); do
if [ -n "$vm" ]; then
virsh vcpupin $vm --vcpu 0 --cpulist 0-1
virsh vcpupin $vm --vcpu 1 --cpulist 2-3
fi
done
for vm in $(virsh list --name); do
if [ -n "$vm" ]; then
virsh setmem $vm 4G --live
fi
done
}
cleanup() {
echo "$(date): Performing cleanup tasks" >> $LOG_FILE
find /var/log -name "*.log" -size +100M -exec truncate -s 50M {} \;
find /tmp -name "*.xml" -mtime +1 -delete
}
main() {
case $1 in
health) health_check;;
backup) auto_backup;;
optimize) performance_optimization;;
cleanup) cleanup;;
all) health_check; auto_backup; performance_optimization; cleanup;;
*) echo "Usage: $0 {health|backup|optimize|cleanup|all}"; exit 1;;
esac
}
main $1Conclusion
Deploying a KVM virtualization platform in production involves careful hardware selection, kernel tuning, flexible network design, reliable storage configuration, comprehensive monitoring, and automation. Following the practical steps and scripts provided enables operators to build a stable, high‑performance virtualized infrastructure and streamline ongoing operations.
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Ops Community
A leading IT operations community where professionals share and grow together.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
