Files
vm-bhyve/lib/vm-run

684 lines
22 KiB
Plaintext
Raw Normal View History

#!/bin/sh
#-------------------------------------------------------------------------+
# Copyright (C) 2015 Matt Churchyard (churchers@gmail.com)
# All rights reserved
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted providing that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# 'vm _run'
# run a virtual machine
# this is the background process that does all the work
# in most cases this should not be run directly
#
# @param string _name the name of the guest to run
# @param optional string _iso the iso file for an install
#
__vm_run(){
local _name="$1"
local _iso="$2" _iso_dev
local _cpu _memory _bootdisk _bootdisk_dev _guest _wiredmem
local _guest_support _uefi _uuid _utc _debug _hostbridge _loader
local _opts _devices _slot=4 _func=0 _taplist _exit
local _com _comports _comstring _logpath
local _conf="${vm_dir}/${_name}/${_name}.conf"
# bail out immediately if no guest or running
[ -z "${_name}" -o ! -f "${_conf}" ] && exit 10
__vm_confirm_stopped "${_name}" "1" || exit 10
__config_load "${_conf}"
__config_get "_cpu" "cpu"
__config_get "_memory" "memory"
__config_get "_guest" "guest"
__config_get "_loader" "loader"
__config_get "_bootdisk" "disk0_name"
__config_get "_bootdisk_dev" "disk0_dev"
__config_get "_uefi" "uefi"
__config_get "_hostbridge" "hostbridge"
__config_get "_comports" "comports"
__config_get "_uuid" "uuid"
__config_get "_utc" "utctime"
__config_get "_debug" "debug"
# set defaults
: ${_guest:=generic}
: ${_bootdisk_dev:=file}
: ${_comports:=com1}
: ${_uefi:=no}
: ${_utc:=no}
: ${_debug:=no}
: ${_hostbridge:=standard}
__log "guest" "${_name}" "initialising"
__log "guest" "${_name}" " [guest: ${_guest}]"
__log "guest" "${_name}" " [loader: ${_loader:-default}]"
__log "guest" "${_name}" " [uefi: ${_uefi}]"
__log "guest" "${_name}" " [cpu: ${_cpu}]"
__log "guest" "${_name}" " [memory: ${_memory}]"
__log "guest" "${_name}" " [hostbridge: ${_hostbridge}]"
__log "guest" "${_name}" " [com ports: ${_comports}]"
__log "guest" "${_name}" " [uuid: ${_uuid:-auto}]"
__log "guest" "${_name}" " [utctime: ${_utc}]"
__log "guest" "${_name}" " [debug mode: ${_debug}]"
__log "guest" "${_name}" " [primary disk: ${_bootdisk}]"
__log "guest" "${_name}" " [primary disk dev: ${_bootdisk_dev}]"
# check basic settings
if [ -z "${_guest}" -o -z "${_cpu}" -o -z "${_memory}" -o -z "${_bootdisk}" ]; then
__log "guest" "${_name}" "unable to start - missing required configuration"
exit 15
fi
# default bhyve options
_opts="-AHP"
# if uefi, make sure we have bootrom, then update options for uefi support
if __checkyesno "${_uefi}"; then
if [ ${BSD_VERSION} -lt 1002509 ]; then
2016-02-01 16:16:55 +00:00
__log "guest" "${_name}" "uefi guests can only be run on FreeBSD 10.3 or newer"
exit 15
fi
if [ "${_uefi}" = "csm" ]; then
if [ ! -e "${vm_dir}/.config/BHYVE_UEFI_CSM.fd" ]; then
__log "guest" "${_name}" "please download uefi csm firmware to ${vm_dir}/.config/BHYVE_UEFI_CSM.fd"
exit 15
fi
_opts="-Hwl bootrom,${vm_dir}/.config/BHYVE_UEFI_CSM.fd"
else
if [ ! -e "${vm_dir}/.config/BHYVE_UEFI.fd" ]; then
__log "guest" "${_name}" "please download uefi firmware to ${vm_dir}/.config/BHYVE_UEFI.fd"
exit 15
fi
_opts="-Hwl bootrom,${vm_dir}/.config/BHYVE_UEFI.fd"
fi
else
_uefi=""
fi
# try to find guest support
_guest_support=$(type "__guest_${_guest}" 2>&1 | grep "shell function")
if [ -z "${_guest_support}" ]; then
__log "guest" "${_name}" "unsupported guest type: ${_guest}"
exit 15
fi
# set uuid in opts
[ -n "${_uuid}" ] && _opts="${_opts} -U ${_uuid}"
# set utc time in opts if requested
if __checkyesno "${_utc}"; then
if [ ${BSD_VERSION} -ge 1002000 ]; then
_opts="${_opts} -u"
else
__log "guest" "${_name}" "utc time requested but not available pre FreeBSD 10.2"
fi
fi
# see where to send bhyve output
if __checkyesno "${_debug}"; then
_logpath="${vm_dir}/${_name}/bhyve.log"
else
_logpath="/dev/null"
fi
# add cpu and memory to opts so we can see all bhyve command in logs
_opts="-c ${_cpu} -m ${_memory} ${_opts}"
# complete the boot disk path
__vm_get_disk_path "_bootdisk" "${_name}" "${_bootdisk}" "${_bootdisk_dev}"
# build bhyve device string
__vm_bhyve_device_comports
__vm_bhyve_device_basic
__vm_bhyve_device_disks
__vm_bhyve_device_networking
__vm_bhyve_device_rand
__vm_bhyve_device_passthru
__vm_lock "${_name}"
__log "guest" "${_name}" "booting"
while [ 1 ]; do
# destroy existing vmm
# freebsd seems happy to run a bhyveload/bhyve loop
# grub-bhyve doesn't seem to like it
# Peter says don't destroy in Windows instructions, so don't if in UEFI mode
if [ -e "/dev/vmm/${_name}" -a -z "${_uefi}" ]; then
bhyvectl --vm="${_name}" --destroy >/dev/null 2>&1
if [ $? -ne 0 ]; then
__log "guest" "${_name}" "failed to destroy existing vmm device"
_exit=15
break
fi
fi
# load guest
if [ -n "${_iso}" ]; then
_iso_dev="-s 3:0,ahci-cd,${vm_dir}/.iso/${_iso}"
eval "__guest_${_guest}" "install"
else
# always provide a cd for non-csm UEFI guests at the moment
[ -n "${_uefi}" -a "${_uefi}" != "csm" ] && _iso_dev="-s 3:0,ahci-cd,${vm_dir}/.config/null.iso"
eval "__guest_${_guest}" "run"
fi
# check no errors
if [ ${_exit} -ne 0 ]; then
__log "guest" "${_name}" "loader returned error ${_exit}"
break
fi
__log "guest" "${_name}" " [bhyve options: ${_opts}]"
__log "guest" "${_name}" " [bhyve devices: ${_devices}]"
__log "guest" "${_name}" " [bhyve console: ${_comstring}]"
[ -n "${_iso_dev}" ] && __log "guest" "${_name}" " [bhyve iso device: ${_iso_dev}]"
__log "guest" "${_name}" "starting bhyve"
# actually run bhyve!
# we're already in the background so we just wait for it to exit
bhyve ${_opts} \
${_devices} \
${_iso_dev} \
${_comstring} \
${_name} >>"${_logpath}" 2>&1
# get bhyve exit code
_exit=$?
__log "guest" "${_name}" "bhyve exited with status ${_exit}"
# if 0, guest rebooted so continue loop
# anything else we break and shutdown
[ $_exit -ne 0 ] && break
__log "guest" "${_name}" "restarting"
# remove install iso so guest reboots from disk
# after install non-uefi guests will still get install cd until a full shutdown+restart
# as we don't reset _iso_dev
_iso=""
done
# destroy taps
for _devices in ${_taplist}; do
__log "guest" "${_name}" "destroying network device ${_devices}"
ifconfig "${_devices}" destroy
done
__log "guest" "${_name}" "stopped"
bhyvectl --destroy --vm=${_name}
__vm_unlock "${_name}"
exit ${_exit}
}
# lock a vm
# stop another instance being started on this or another host
# we write hostname so vm-bhyve can inform user which host locked a vm
#
# @param string - the name of the guest to lock
#
__vm_lock(){
hostname > "${vm_dir}/$1/run.lock"
}
# unlock a vm
#
# @param string - the name of the guest to unlock
#
__vm_unlock(){
unlink "${vm_dir}/$1/run.lock" >/dev/null 2>&1
}
# create string for guest com ports
# this builds the '-l comX' part of the bhyve command into _comstring
# _com is used by bhyveload|grub_bhyve so we set that to the first
# com port we come across.
# The nmdm devices are written to $vm_dir/{guest}/console so we can
# read them back later for the 'vm console' command
#
# @modifies _com _comstring
#
__vm_bhyve_device_comports(){
local _port _num=1 _nmdm=""
unlink "${vm_dir}/${_name}/console" >/dev/null 2>&1
for _port in ${_comports}; do
__vm_find_console "_nmdm" "${_nmdm}"
# use first com port for the loader
[ ${_num} -eq 1 ] && _com="/dev/nmdm${_nmdm}A"
echo "${_port}=/dev/nmdm${_nmdm}B" >> "${vm_dir}/${_name}/console"
_comstring="${_comstring}${_comstring:+ }-l ${_port},/dev/nmdm${_nmdm}A"
_num=$((_num + 1))
done
}
# get bhyve device string for basic devices
# hostbridge & lpc on their own slots
# windows requires slot 0 & 31, nothing else cares fortunately
#
# @modifies _devices
#
__vm_bhyve_device_basic(){
2015-10-24 20:27:24 +01:00
# add hostbridge
case "$_hostbridge" in
no*)
;;
amd)
_devices="-s 0,amd_hostbridge"
;;
*)
_devices="-s 0,hostbridge"
;;
esac
# lpc
_devices="${_devices}${_devices:+ }-s 31,lpc"
}
# get bhyve device string for disk devices
# read all disks starting at 0 and add to the _devices string
# this is done first so disks will start at slot 4. For uefi,
# we move through slots and stop at slot 6. For non-uefi we
# step through all functions and just keep going
#
# @modifies _devices _slot
#
__vm_bhyve_device_disks(){
local _disk _type _dev _path _opts
local _num=0
# get disks
while [ 1 ]; do
__config_get "_disk" "disk${_num}_name"
__config_get "_type" "disk${_num}_type"
__config_get "_dev" "disk${_num}_dev"
__config_get "_opts" "disk${_num}_opts"
# break if disk not found
[ -z "${_disk}" -o -z "${_type}" ] && break
# we need to move slot if we've hit function 8
if [ ${_func} -ge 8 ]; then
_func=0
_slot=$((_slot + 1))
fi
__vm_get_disk_path "_path" "${_name}" "${_disk}" "${_dev}"
_devices="${_devices} -s ${_slot}:${_func},${_type},${_path}"
[ -n "${_opts}" ] && _devices="${_devices},${_opts}"
if [ -n "${_uefi}" ]; then
_slot=$((_slot + 1))
# can't go past slot 6 with current UEFI firmware
if [ ${_slot} -ge 7 ]; then
__log "guest" "${_name}" "ending disks at disk${_num} due to UEFI firmware limitations"
break
fi
else
_func=$((_func + 1))
fi
_num=$((_num + 1))
done
# if we have any disks, move next devices to new slot
# unless uefi mode, because we'll have incremented slot anyway
if [ ${_num} -ge 1 -a -z "${_uefi}" ]; then
_slot=$((_slot + 1))
_func=0
fi
}
# get bhyve device string for networking
# we dynamically create a new tap device for each interface
# if we can find the correct bridge, we then add the tap as a member
# we add each tap to __taplist from __vm_run which it will
# use to desstroy them all on shutdown
#
# @modifies _devices _slot _taplist
#
__vm_bhyve_device_networking(){
local _type _switch _mac _custom_tap _tap _sid _mtu
local _num=0
while [ 1 ]; do
__config_get "_type" "network${_num}_type"
__config_get "_switch" "network${_num}_switch"
__config_get "_mac" "network${_num}_mac"
__config_get "_custom_tap" "network${_num}_device"
# we need at least a type
[ -z "${_type}" ] && break
# set a static mac if we don't have one
[ -z "${_mac}" ] && __vm_generate_static_mac
if __switch_is_vale "${_switch}"; then
# create a vale port id
__switch_vale_port "_tap" "${_switch}" "${_mac}"
__log "guest" "${_name}" "adding vale interface ${_tap} (${_switch})"
_devices="${_devices} -s ${_slot}:${_func},${_type},${_tap}"
[ -n "${_mac}" ] && _devices="${_devices},mac=${_mac}"
_func=$((_func + 1))
else
# create interface
if [ -n "${_custom_tap}" ]; then
_tap="${_custom_tap}"
else
_tap=$(ifconfig tap create)
fi
if [ -n "${_tap}" ]; then
# move slot if we've hit function 8
if [ ${_func} -ge 8 ]; then
_func=0
_slot=$((_slot + 1))
fi
__log "guest" "${_name}" "initialising network device ${_tap}"
ifconfig "${_tap}" description "vmnet-${_name}-${_num}-${_switch:-custom}" >/dev/null 2>&1
2015-11-16 16:29:55 +00:00
if [ -n "${_switch}" ]; then
__switch_get_ident "_sid" "${_switch}"
if [ -n "${_sid}" ]; then
_mtu=$(ifconfig "${_sid}" | head -n1 | awk '{print $NF}')
if [ "${_mtu}" != "1500" ]; then
__log "guest" "${_name}" "setting mtu of ${_tap} to ${_mtu}"
ifconfig "${_tap}" mtu "${_mtu}" >/dev/null 2>&1
fi
__log "guest" "${_name}" "adding ${_tap} -> ${_sid} (${_switch})"
ifconfig "${_sid}" addm "${_tap}" >/dev/null 2>&1
[ $? -ne 0 ] && __log "guest" "${_name}" "failed to add ${_tap} to ${_sid}"
else
__log "guest" "${_name}" "failed to find virtual switch '${_switch}'"
fi
fi
_devices="${_devices} -s ${_slot}:${_func},${_type},${_tap}"
[ -n "${_mac}" ] && _devices="${_devices},mac=${_mac}"
_func=$((_func + 1))
[ -z "${_custom_tap}" ] && _taplist="${_taplist}${_taplist:+ }${_tap}"
fi
fi
_num=$((_num + 1))
done
if [ ${_num} -ge 1 ]; then
_slot=$((_slot + 1))
_func=0
fi
}
# check if user wants a virtio-rand device
#
# @modifies _devices _slot
#
__vm_bhyve_device_rand(){
local _rand
__config_get "_rand" "virt_random"
if __checkyesno "${_rand}"; then
_devices="${_devices} -s ${_slot}:0,virtio-rnd"
_slot=$((_slot + 1))
fi
}
# get any pci passthrough devices
# FreeBSD 11 needs wired memory so update _opts in that case if
# we have any pass through devices
#
# @modifies _devices _slot _opts _wiredmem
#
__vm_bhyve_device_passthru(){
local _dev _orig_slot _orig_func
local _last_orig_slot
local _num=0
while [ 1 ]; do
__config_get "_dev" "passthru${_num}"
[ -z "${_dev}" ] && break
_orig_slot=${_dev%%/*}
_orig_func=${_dev##*/}
# only move to new slot if the original device is on a different slot to the last one.
# if user wants to passthru a device that has multiple functions which must stay together
# on one slot, they should be together in configuration file
if [ -n "${_last_orig_slot}" -a "${_last_orig_slot}" != "${_orig_slot}" ]; then
_slot=$((_slot + 1))
fi
# we use the original function number for all devices.
# does this work if you only pass through function 1 or 2, and leave 0 empty?
_devices="${_devices} -s ${_slot}:${_orig_func},passthru,${_dev}"
_last_orig_slot=${_orig_slot}
_num=$((_num + 1))
done
if [ ${_num} -ge 1 ]; then
_slot=$((_slot + 1))
# add wired memory for 10.3+
[ ${BSD_VERSION} -ge 1003000 ] && _opts="${_opts} -S" && _wiredmem="1"
fi
}
# get the path to a disk image depending on type.
# the disk name in configuration is usually the name of the file
# or zvol, directly under the guest directory/dataset.
# if a user wants the disk image anywhere else, they can specify
2015-10-23 16:45:04 +01:00
# the following
# diskX_dev="custom"
# diskX_name="/full/path/to/disk/device/or/file"
#
# @param string _var variable to put disk path into
# @param string _name the name of the guest
# @param string _disk the name of the disk
# @param string _disk_dev=file|zvol|sparse-zvol|custom type of device
#
__vm_get_disk_path(){
local _var="$1"
local _name="$2"
local _disk="$3"
local _disk_dev="$4"
case "${_disk_dev}" in
zvol)
;&
sparse-zvol)
# need to look at this, don't really want to reference VM_ZFS* variables here
setvar "${_var}" "/dev/zvol/${VM_ZFS_DATASET}/${_name}/${_disk}"
;;
custom)
setvar "${_var}" "${_disk}"
;;
*)
setvar "${_var}" "${vm_dir}/${_name}/${_disk}"
;;
esac
}
# find a spare nmdm device for a vm
# for guests that have multiple consoles, we won't yet have opened any
# of them, so we can pass in the number of the previous console, and use
# that to start the next search from. otherwise we'd find the same number again
#
# @param string _var variable to put value into
# @param optional int _num the device number to start searching at (default = 0)
#
__vm_find_console(){
local _var="$1"
local _num="$2"
local _ls
if [ -n "${_num}" ]; then
_num=$((_num + 1))
else
_num=0
fi
# loop until we find an available nmdm
# using -e seemed to create devices so now scanning ls
while [ 1 ]; do
_ls=$(ls -1 /dev | grep "nmdm${_num}A")
[ -z "${_ls}" ] && break
_num=$((_num + 1))
done
# set value
setvar "${_var}" "${_num}"
}
# get a list of all running virtual machines
# this loads a list of all running guests into global variables
# both variables are in the following format with one guest per line
# "pid guest"
#
# VM_RUN_BHYVE = all running bhyve instances
# VM_RUN_LOAD = all loading guests (bhyveload|grub-bhyve)
#
2015-10-23 16:45:04 +01:00
# @modifies VM_RUN_BHYVE VM_RUN_LOAD
#
__vm_running_load(){
VM_RUN_BHYVE=$(pgrep -fl "bhyve:" | awk '{print $1,$NF}')
VM_RUN_LOAD=$(pgrep -fl "grub-bhyve|bhyveload" | awk '{print $1,$NF}')
}
# see if a specific virtual machine is running
# we search the VM_RUN_BHYVE and VM_RUN_LOAD variables looking for the
# specified guest. If found we output "Running" or "Bootloader", along with
# the PID. If not running we output "Stopped"
#
# @param string _var variable to put result into
# @param string _name name of the guest to look for
# @return success if running
#
__vm_running_check(){
local _var="$1"
local _name="$2"
local IFS=$'\n'
local _entry
for _entry in ${VM_RUN_BHYVE}; do
if [ "${_entry##* }" = "${_name}" ]; then
setvar "${_var}" "Running (${_entry%% *})"
return 0
fi
done
for _entry in ${VM_RUN_LOAD}; do
if [ "${_entry##* }" = "${_name}" ]; then
setvar "${_var}" "Bootloader (${_entry%% *})"
return 0
fi
done
setvar "${_var}" "Stopped"
return 1
}
# make sure a virtual machine is not running
# display warning if it is. up to caller to decide whether to continue.
# on unclean shutdown, lock files may not be cleared up by vm-bhyve,
# we now provide option to skip the lock file if it references this host,
# and there is no /dev/vmm/{_name}.
#
# @param string _name the guest name
# @param int _skip_lock skip local lock file if no /dev/vmm found
# @return success if not running
#
__vm_confirm_stopped(){
local _name="$1"
local _skip_lock="$2"
local _host _our_host
_our_host=$(hostname)
# check vm-bhyve lock
# this will err even if guest is running on another node
if [ -e "${vm_dir}/${_name}/run.lock" ]; then
_host=$(head -n 1 "${vm_dir}/${_name}/run.lock")
if [ "${_host}" != "${_our_host}" -o "${_skip_lock}" != "1" ]; then
__warn "${_name} appears to be running on ${_host} (locked)"
return 1
fi
fi
# check local machine, just in case guest run manually
if [ -e "/dev/vmm/${_name}" ]; then
__warn "${_name} appears to be running locally (vmm exists)"
return 1
fi
return 0
}
2015-11-16 16:29:55 +00:00
# generate a static mac address for a guest.
2015-11-16 16:29:55 +00:00
# we now make sure all interfaces have a static mac so
# there's no worry of guest problems due to it changing.
# bhyve is allocated 58:9c:fc:0x:xx:xx, so we try and
# randomise the last 20 bits (5 hex digits).
# using guest name, interface number and time as seed,
# with an incrementing integer just in case we happen to
# get 5 zeros.
2015-11-16 16:29:55 +00:00
#
__vm_generate_static_mac(){
local _time=$(date +%s)
local _key _part="0:00:00"
2016-03-28 18:57:42 +01:00
local _base _int=0
2015-11-16 16:29:55 +00:00
_base="${_name}:${_num}:${_time}"
2015-11-16 16:29:55 +00:00
# generate the last 4 bytes
# make sure we don't get 0:00:00 (see sys/net/ieee_oui.h)
while [ "${_part}" = "0:00:00" ]; do
2016-03-28 18:57:42 +01:00
_key="${_base}:${_int}"
_part=$(md5 -qs "${_key}" | awk 'BEGIN {FS="";OFS=":"}; {print $1,$2$3,$4$5}')
2016-03-28 18:57:42 +01:00
_int=$((_int + 1))
2015-11-16 16:29:55 +00:00
done
# add base bhyve OUI
_mac="58:9c:fc:0${_part}"
2015-11-16 16:29:55 +00:00
__log "guest" "${_name}" "generated static mac ${_mac} (based on '${_key}')"
__vm_config_set "${_name}" "network${_num}_mac" "${_mac}"
}