Nova API的实现,以创建云主机过程分析
1. 首先我们要清楚,为nova添加一个新的api是很简单的,因为上面的那一套HTTP请求不需要我们管,只是照着来写就行了,
主要是你的api内的自己的逻辑;
2. 所有的api文件都是放在目录'nova/api/openstack/compute'下,M版本的代码没有了目录'nova/api/openstack/compute/contrib'
3. 以创建云主机来分析:
REQ: curl -i 'http://192.168.1.9:8774/v2/14fd316568bc4f6992ba161fd4e23001/servers' -X POST -H "X-Auth-Project-Id: 14fd316568bc4f6992ba161fd4e23001"
-H "User-Agent: python-novaclient" -H "Content-Type: application/json" -H "Accept: application/json" -H "X-Auth-Token: 3b9da36bfcf4491f88994b25c3045f95"
-d '{"server": {"name": "createVm", "imageRef": "db281a42-df86-4a3d-a8df-1af7b2eb80bc", "availability_zone": "nova", "key_name": "keyName",
"flavorRef": "8", "max_count": 1, "min_count": 1, "networks": [{"port": "406b40b4-6b66-47d9-a943-9e341aeae6ef"}]}}'
3.1 根据这个请求,nova-api服务监听到后,可以很快的定位到文件'nova/api/openstack/compute/servers.py'
# vim nova/api/openstack/compute/servers.py
class ServersController(wsgi.Controller):
"""The Server API base controller class for the OpenStack API."""
# 这些命名空间(对应于文件setup.cfg内定义的方法)会作为扩展api的形式被加载进来,下面的代码通过stevedore.enabled.EnabledExtensionManager实现的
EXTENSION_CREATE_NAMESPACE = 'nova.api.v21.extensions.server.create'
EXTENSION_REBUILD_NAMESPACE = 'nova.api.v21.extensions.server.rebuild'
EXTENSION_UPDATE_NAMESPACE = 'nova.api.v21.extensions.server.update'
EXTENSION_RESIZE_NAMESPACE = 'nova.api.v21.extensions.server.resize'
# NOTE:在目录'nova/api/openstack/compute'下有两个文件夹'schemas'/'views',这两个文件夹下的内容将会在创建云主机的时候初始化
_view_builder_class = views_servers.ViewBuilderV21
schema_server_create = schema_servers.base_create
schema_server_update = schema_servers.base_update
schema_server_rebuild = schema_servers.base_rebuild
schema_server_resize = schema_servers.base_resize
schema_server_create_v20 = schema_servers.base_create_v20
schema_server_update_v20 = schema_servers.base_update_v20
schema_server_rebuild_v20 = schema_servers.base_rebuild_v20
schema_server_create_v219 = schema_servers.base_create_v219
schema_server_update_v219 = schema_servers.base_update_v219
schema_server_rebuild_v219 = schema_servers.base_rebuild_v219
3.2 进入具体的方法内
@wsgi.response(202)
@extensions.expected_errors((400, 403, 409, 413))
@validation.schema(schema_server_create_v20, '2.0', '2.0')
@validation.schema(schema_server_create, '2.1', '2.18')
@validation.schema(schema_server_create_v219, '2.19')
def create(self, req, body):
"""Creates a new server for a given user."""
前面进行一些操作的策略权限的判断主要是和文件(policy.json),部分参数的读取;
然后进入方法:
(instances, resv_id) = self.compute_api.create(context,
inst_type,
image_uuid,
display_name=name,
display_description=description,
availability_zone=availability_zone,
forced_host=host, forced_node=node,
metadata=server_dict.get('metadata', {}),
admin_password=password,
requested_networks=requested_networks,
check_server_group_quota=True,
**create_kwargs)
3.3 进入文件‘nova/compute/api.py’
# 这个hooks很有意思,你可以用它来做很多的事情
@hooks.add_hook("create_instance")
def create(self, context, instance_type,
image_href, kernel_id=None, ramdisk_id=None,
min_count=None, max_count=None,
display_name=None, display_description=None,
key_name=None, key_data=None, security_group=None,
availability_zone=None, forced_host=None, forced_node=None,
user_data=None, metadata=None, injected_files=None,
admin_password=None, block_device_mapping=None,
access_ip_v4=None, access_ip_v6=None, requested_networks=None,
config_drive=None, auto_disk_config=None, scheduler_hints=None,
legacy_bdm=True, shutdown_terminate=False,
check_server_group_quota=False):
"""Provision instances, sending instance information to the
scheduler. The scheduler will determine where the instance(s)
go and will handle creating the DB entries.
Returns a tuple of (instances, reservation_id)
"""
# 为什么又进行一次策略的检查?
# Check policies up front to fail before performing more expensive work
self._check_create_policies(context, availability_zone,
requested_networks, block_device_mapping, forced_host,
forced_node)
if requested_networks and max_count > 1:
self._check_multiple_instances_and_specified_ip(requested_networks)
if utils.is_neutron():
self._check_multiple_instances_neutron_ports(
requested_networks)
if availability_zone:
available_zones = availability_zones.\
get_availability_zones(context.elevated(), True)
if forced_host is None and availability_zone not in \
available_zones:
msg = _('The requested availability zone is not available')
raise exception.InvalidRequest(msg)
# 生成一个主机过滤的字典
filter_properties = scheduler_utils.build_filter_properties(
scheduler_hints, forced_host, forced_node, instance_type)
# 进入方法'def _create_instance()'
return self._create_instance(
context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
display_name, display_description,
key_name, key_data, security_group,
availability_zone, user_data, metadata,
injected_files, admin_password,
access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config,
filter_properties=filter_properties,
legacy_bdm=legacy_bdm,
shutdown_terminate=shutdown_terminate,
check_server_group_quota=check_server_group_quota)
3.4 进入方法'def _create_instance()'
这个方法的主要作用是:
def _create_instance(self, context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
display_name, display_description,
key_name, key_data, security_groups,
availability_zone, user_data, metadata, injected_files,
admin_password, access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config, filter_properties,
reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
check_server_group_quota=False):
"""Verify all the input parameters regardless of the provisioning
strategy being performed and schedule the instance(s) for
creation.
"""
......
self.compute_task_api.build_instances(context,
instances=instances, image=boot_meta,
filter_properties=filter_properties,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=False)
3.5 调用方法
self.compute_task_api.build_instances(context,
instances=instances, image=boot_meta,
filter_properties=filter_properties,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=False)
3.5.1 我们先来看看'self.compute_task_api'
根据代码流程可以很快的知道它调到文件'nova/conductor/rpcapi.py'内的类class ComputeTaskAPI(object):
先看它的初始化函数:
def __init__(self):
super(ComputeTaskAPI, self).__init__()
target = messaging.Target(topic=CONF.conductor.topic,
namespace='compute_task',
version='1.0')
serializer = objects_base.NovaObjectSerializer()
self.client = rpc.get_client(target, serializer=serializer)
引入了一个新的模块'oslo_messaging',利用了rpc消息队列处理,这些东西这里就不展开将了,下次可以专门开一篇博客将,
我们主要要知道的是,这里主要是通过利用包装后的rpc消息队列(rabbitMQ/ZeroMQ/...)来各个服务之间的通信
3.6 现在进入文件"nova/conductor/api.py",进入函数:
def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping, legacy_bdm=True):
self.conductor_compute_rpcapi.build_instances(context,
instances=instances, image=image,
filter_properties=filter_properties,
admin_password=admin_password, injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=legacy_bdm)
3.7 进入文件'nova/conductor/rpcapi.py',进入函数:
def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping, legacy_bdm=True):
image_p = jsonutils.to_primitive(image)
version = '1.10'
if not self.client.can_send_version(version):
version = '1.9'
if 'instance_type' in filter_properties:
flavor = filter_properties['instance_type']
flavor_p = objects_base.obj_to_primitive(flavor)
filter_properties = dict(filter_properties,
instance_type=flavor_p)
kw = {'instances': instances, 'image': image_p,
'filter_properties': filter_properties,
'admin_password': admin_password,
'injected_files': injected_files,
'requested_networks': requested_networks,
'security_groups': security_groups}
if not self.client.can_send_version(version):
version = '1.8'
kw['requested_networks'] = kw['requested_networks'].as_tuples()
if not self.client.can_send_version('1.7'):
version = '1.5'
bdm_p = objects_base.obj_to_primitive(block_device_mapping)
kw.update({'block_device_mapping': bdm_p,
'legacy_bdm': legacy_bdm})
# 前面主要是进行版本的判断和少量的参数重组
# 下面这两行才是关键
# 准备rpc服务
# 调用cast方法,这是一个异步任务,其中参数'build_instances'表示消息接受方的方法
cctxt = self.client.prepare(version=version)
cctxt.cast(context, 'build_instances', **kw)
3.8 进入文件'nova/conductor/manager.py',调用函数:
def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping=None, legacy_bdm=True):
# TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
# 2.0 of the RPC API.
# TODO(danms): Remove this in version 2.0 of the RPC API
if (requested_networks and
not isinstance(requested_networks,
objects.NetworkRequestList)):
requested_networks = objects.NetworkRequestList(
objects=[objects.NetworkRequest.from_tuple(t)
for t in requested_networks])
# TODO(melwitt): Remove this in version 2.0 of the RPC API
flavor = filter_properties.get('instance_type')
if flavor and not isinstance(flavor, objects.Flavor):
# Code downstream may expect extra_specs to be populated since it
# is receiving an object, so lookup the flavor to ensure this.
flavor = objects.Flavor.get_by_id(context, flavor['id'])
filter_properties = dict(filter_properties, instance_type=flavor)
request_spec = {}
try:
# check retry policy. Rather ugly use of instances[0]...
# but if we've exceeded max retries... then we really only
# have a single instance.
# 为过滤做一些基本判断和参数的组合
scheduler_utils.populate_retry(
filter_properties, instances[0].uuid)
request_spec = scheduler_utils.build_request_spec(
context, image, instances)
# 结果一系列的过滤条件,得到符合条件的物理主机,用来启动云主机
# 为每一台云主机分配一个物理主机
# 具体的过滤规则会另外开一篇博客讲解
hosts = self._schedule_instances(
context, request_spec, filter_properties)
except Exception as exc:
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
for instance in instances:
self._set_vm_state_and_notify(
context, instance.uuid, 'build_instances', updates,
exc, request_spec)
self._cleanup_allocated_networks(
context, instance, requested_networks)
return
# 循环创建云主机
# 个人感觉这里会有一些问题,如果数量很多的话,这个循环就相当于排队执行,会不会耗时间,可以在这里加一些策略
# 但是仔细想想,因为你调用libvirt的api它就是一个一个的来的,所以循环还是得要,所以可以考虑对相同的hosts的云主机进行优化
for (instance, host) in six.moves.zip(instances, hosts):
try:
instance.refresh()
except (exception.InstanceNotFound,
exception.InstanceInfoCacheNotFound):
LOG.debug('Instance deleted during build', instance=instance)
continue
local_filter_props = copy.deepcopy(filter_properties)
scheduler_utils.populate_filter_properties(local_filter_props,
host)
# The block_device_mapping passed from the api doesn't contain
# instance specific information
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
# host--表示你要往哪台物理主机上启动这个云主机,主要是通过rpc发送过去的
self.compute_rpcapi.build_and_run_instance(context,
instance=instance, host=host['host'], image=image,
request_spec=request_spec,
filter_properties=local_filter_props,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=bdms, node=host['nodename'],
limits=host['limits'])
3.9 现在进入文件'nova/compute/rpcapi.py',找到函数:
def build_and_run_instance(self, ctxt, instance, host, image, request_spec,
filter_properties, admin_password=None, injected_files=None,
requested_networks=None, security_groups=None,
block_device_mapping=None, node=None, limits=None):
version = '4.0'
# 准备rpc客户端,进行rpc消息队列的调用,server参数很重要,它表示这个动作会往哪台物理主机上发送
# 说白了就是这个云主机将会在哪台物理主机上启动
cctxt = self.client.prepare(server=host, version=version)
# 异步的方法,这里必须使用异步的方法,不然前面的循环就会成为鸡肋
cctxt.cast(ctxt, 'build_and_run_instance', instance=instance,
image=image, request_spec=request_spec,
filter_properties=filter_properties,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping, node=node,
limits=limits)
3.10 进入文件'nova/compute/manager.py',调用函数:
@wrap_exception()
@reverts_task_state
@wrap_instance_fault
def build_and_run_instance(self, context, instance, image, request_spec,
filter_properties, admin_password=None,
injected_files=None, requested_networks=None,
security_groups=None, block_device_mapping=None,
node=None, limits=None):
@utils.synchronized(instance.uuid)
def _locked_do_build_and_run_instance(*args, **kwargs):
# NOTE(danms): We grab the semaphore with the instance uuid
# locked because we could wait in line to build this instance
# for a while and we want to make sure that nothing else tries
# to do anything with this instance while we wait.
with self._build_semaphore:
self._do_build_and_run_instance(*args, **kwargs)
# NOTE(danms): We spawn here to return the RPC worker thread back to
# the pool. Since what follows could take a really long time, we don't
# want to tie up RPC workers.
# 这里主要是使用了python的第三方库eventlet,以实现高并发
# 函数的第一个参数就是它将要执行的func
utils.spawn_n(_locked_do_build_and_run_instance,
context, instance, image, request_spec,
filter_properties, admin_password, injected_files,
requested_networks, security_groups,
block_device_mapping, node, limits)
3.11 接下来进入方法:
@hooks.add_hook('build_instance')
@wrap_exception()
@reverts_task_state
@wrap_instance_event
@wrap_instance_fault
def _do_build_and_run_instance(self, context, instance, image,
request_spec, filter_properties, admin_password, injected_files,
requested_networks, security_groups, block_device_mapping,
node=None, limits=None):
......
try:
with timeutils.StopWatch() as timer:
# 接下来会进入此方法
self._build_and_run_instance(context, instance, image,
decoded_files, admin_password, requested_networks,
security_groups, block_device_mapping, node, limits,
filter_properties)
LOG.info(_LI('Took %0.2f seconds to build instance.'),
timer.elapsed(), instance=instance)
return build_results.ACTIVE
3.12 进入方法:
def _build_and_run_instance(self, context, instance, image, injected_files,
admin_password, requested_networks, security_groups,
block_device_mapping, node, limits, filter_properties):
image_name = image.get('name')
# notify,事件通知,使用了rpc的消息机制
self._notify_about_instance_usage(context, instance, 'create.start',
extra_usage_info={'image_name': image_name})
try:
# 获取参数node主机上的可用资源
rt = self._get_resource_tracker(node)
# 在启动之前就先得到主机上的资源
with rt.instance_claim(context, instance, limits):
# NOTE(russellb) It's important that this validation be done
# *after* the resource tracker instance claim, as that is where
# the host is set on the instance.
# 之前在做过滤scheduler的时候,引入了,所以这个时候要对主机在参数filter_properties
# 内做判断
self._validate_instance_group_policy(context, instance,
filter_properties)
image_meta = objects.ImageMeta.from_dict(image)
# 建立云主机所需的资源
# 其中包括网络,bdm
with self._build_resources(context, instance,
requested_networks, security_groups, image_meta,
block_device_mapping) as resources:
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.SPAWNING
# NOTE(JoshNang) This also saves the changes to the
# instance from _allocate_network_async, as they aren't
# saved in that function to prevent races.
instance.save(expected_task_state=
task_states.BLOCK_DEVICE_MAPPING)
block_device_info = resources['block_device_info']
network_info = resources['network_info']
LOG.debug('Start spawning the instance on the hypervisor.',
instance=instance)
with timeutils.StopWatch() as timer:
# self.driver表示用的是哪种类型的后端虚拟化技术
# 可以通过配置文件来进行配置
# 现在跳到3.13
self.driver.spawn(context, instance, image_meta,
injected_files, admin_password,
network_info=network_info,
block_device_info=block_device_info)
LOG.info(_LI('Took %0.2f seconds to spawn the instance on '
'the hypervisor.'), timer.elapsed(),
instance=instance)
except ......
......
# If CONF.default_access_ip_network_name is set, grab the
# corresponding network and set the access ip values accordingly.
network_name = CONF.default_access_ip_network_name
if (network_name and not instance.access_ip_v4 and
not instance.access_ip_v6):
# Note that when there are multiple ips to choose from, an
# arbitrary one will be chosen.
for vif in network_info:
if vif['network']['label'] == network_name:
for ip in vif.fixed_ips():
if not instance.access_ip_v4 and ip['version'] == 4:
instance.access_ip_v4 = ip['address']
if not instance.access_ip_v6 and ip['version'] == 6:
instance.access_ip_v6 = ip['address']
break
self._update_instance_after_spawn(context, instance)
try:
instance.save(expected_task_state=task_states.SPAWNING)
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError) as e:
with excutils.save_and_reraise_exception():
self._notify_about_instance_usage(context, instance,
'create.end', fault=e)
self._update_scheduler_instance_info(context, instance)
self._notify_about_instance_usage(context, instance, 'create.end',
extra_usage_info={'message': _('Success')},
network_info=network_info)
3.13 进入文件'nova/virt/libvirt/driver.py',调用方法:
首先我们要知道,它最后肯定调用的是libvirt的api,只是在这里组一个xml文件出来
# NOTE(ilyaalekseyev): Implementation like in multinics
# for xenapi(tr3buchet)
def spawn(self, context, instance, image_meta, injected_files,
admin_password, network_info=None, block_device_info=None):
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
image_meta,
block_device_info)
# 创建云主机所需的镜像
# 期间用到了镜像缓存的小功能
# 原理很简单,就是先规定一个系统目录,将所需的image都会缓存到此,然后如果第二次基于这个image启动云主机时
# 就会去这个目录先判断,如果有了,就不去远端下载了
self._create_image(context, instance,
disk_info['mapping'],
network_info=network_info,
block_device_info=block_device_info,
files=injected_files,
admin_pass=admin_password)
# 组合出一个xml文件出来
# 如果你要修改云主机的xml,可以从这里进入入手做修改
xml = self._get_guest_xml(context, instance, network_info,
disk_info, image_meta,
block_device_info=block_device_info,
write_to_disk=True)
# 现在进入这个方法,3.14
self._create_domain_and_network(context, xml, instance, network_info,
disk_info,
block_device_info=block_device_info)
LOG.debug("Instance is running", instance=instance)
def _wait_for_boot():
"""Called at an interval until the VM is running."""
state = self.get_info(instance).state
if state == power_state.RUNNING:
LOG.info(_LI("Instance spawned successfully."),
instance=instance)
raise loopingcall.LoopingCallDone()
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot)
timer.start(interval=0.5).wait()
3.14 进入函数
def _create_domain_and_network(self, context, xml, instance, network_info,
disk_info, block_device_info=None,
power_on=True, reboot=False,
vifs_already_plugged=False):
"""Do required network setup and create domain."""
......
guest = None
try:
with self.virtapi.wait_for_instance_event(
instance, events, deadline=timeout,
error_callback=self._neutron_failed_callback):
self.plug_vifs(instance, network_info)
self.firewall_driver.setup_basic_filtering(instance,
network_info)
self.firewall_driver.prepare_instance_filter(instance,
network_info)
with self._lxc_disk_handler(instance, instance.image_meta,
block_device_info, disk_info):
# 现在进入到3.15
guest = self._create_domain(
xml, pause=pause, power_on=power_on)
self.firewall_driver.apply_instance_filter(instance,
network_info)
3.15 进入函数:‘def _create_domain’
# TODO(sahid): Consider renaming this to _create_guest.
def _create_domain(self, xml=None, domain=None,
power_on=True, pause=False):
"""Create a domain.
Either domain or xml must be passed in. If both are passed, then
the domain definition is overwritten from the xml.
:returns guest.Guest: Guest just created
"""
if xml:
# 从这里往下看很快就会看到调用的libvirt的api是'defineXML(xml)'
# param xml: XML domain definition of the guest.
# returns: a virDomain instance
guest = libvirt_guest.Guest.create(xml, self._host)
else:
guest = libvirt_guest.Guest(domain)
if power_on or pause:
guest.launch(pause=pause)
if not utils.is_neutron():
guest.enable_hairpin()
return guest
3.16 到这里一个云主机的创建步骤就简单的结束了,当然,后期libvirt会通过返回event事件的形式来通知上层云主机的状态