在RM的启动过程中有两个重要的步骤,1、服务初始化 2、启动服务。在初始化阶段会将RM中各个功能以服务的形式保存到RM的服务列表中(ArrayList)并执行初始化操作,RM中几乎所有的重要组件都是以服务的形式提供的,RM对服务做了抽象,方便统一管理。下面我们先看RM的服务初始化阶段做了哪些事情.

public static void main(String argv[]) {
//未捕获异常处理类
  Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
  StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG);
  try {
  	//载入控制文件
    Configuration conf = new YarnConfiguration();
    //创建空RM对象,并未包含任何服务,也未启动
    ResourceManager resourceManager = new ResourceManager();
    //添加关闭钩子
    ShutdownHookManager.get().addShutdownHook(
      new CompositeServiceShutdownHook(resourceManager),
      SHUTDOWN_HOOK_PRIORITY);
    setHttpPolicy(conf);
    //初始化服务
    resourceManager.init(conf);
    //启动RM
    resourceManager.start();
  } catch (Throwable t) {
    LOG.fatal("Error starting ResourceManager", t);
    System.exit(-1);
  }
}


上述main函数中主要分析服务初始化和服务启动,RM是个综合服务类继承结构CompositeService->AbstractService,RM初始化是会先进入父类的init函数,AbstractService抽取了服务的基本操作如start、stop、close,只要我们的服务覆盖serviceStart、serviceStop、serviceInit等函数就可以控制自己的服务了,这相当于对服务做了统一的管理。

@Override
public void init(Configuration conf) {
	//服务配置是否为空
  if (conf == null) {
    throw new ServiceStateException("Cannot initialize service "
                                    + getName() + ": null configuration");
  }
  //服务是否已经初始化
  if (isInState(STATE.INITED)) {
    return;
  }
  synchronized (stateChangeLock) {
    if (enterState(STATE.INITED) != STATE.INITED) {
      setConfig(conf);
      try {
      	//服务初始化,会进入子类RM的同名函数
        serviceInit(config);
        if (isInState(STATE.INITED)) {
          //if the service ended up here during init,
          //notify the listeners
          notifyListeners();
        }
      } catch (Exception e) {
        noteFailure(e);
        ServiceOperations.stopQuietly(LOG, this);
        throw ServiceStateException.convert(e);
      }
    }
  }
}

RM的serviceInit会初始化所需服务,会创建相应的服务类然后加入服务列表

@Override
protected void serviceInit(Configuration conf) throws Exception {
	//校验配置合法性,yarn.resourcemanager.am.max-attempts ,validate expireIntvl >= heartbeatIntvl
  validateConfigs(conf);
  this.conf = conf;
  //创建RM上下文,初始化内部数据结构如:application、nodes、inactiveNodes
  this.rmContext = new RMContextImpl();


  // register the handlers for all AlwaysOn services using setupDispatcher().
  //创建异步事件分发器服务,内部主要包含两个数据结构1、存放事件的阻塞队列 2、存放事件类型和处理器的map集合。
  //当一个事件注册时实际是将事件类型和处理器放入一个map集合,需要注意的是处理器可能包含多个,此时处理器类型
  //为multiHandler
  rmDispatcher = setupDispatcher();
  //将服务加入服务列表
  addIfService(rmDispatcher);
  //更新RM上下文对象
  rmContext.setDispatcher(rmDispatcher);


  //创建管理员服务,专门为管理员提供的服务:队列刷新、节点刷新、ACL控制等
  adminService = createAdminService();
  //同上
  addService(adminService);
  //同上
  rmContext.setRMAdminService(adminService);
	
	//HA选项,暂不分析
  this.rmContext.setHAEnabled(HAUtil.isHAEnabled(conf));
  if (this.rmContext.isHAEnabled()) {
    HAUtil.verifyAndSetConfiguration(conf);
  }
  //服务初始化操作,最终调用RM的serviceInit对各个服务进行初始化
  createAndInitActiveServices();


  webAppAddress = WebAppUtils.getRMWebAppURLWithoutScheme(conf);


  super.serviceInit(conf);
}

RM中对父类的serviceInit函数做了覆盖,用以初始化自身服务

@Override
  protected void serviceInit(Configuration configuration) throws Exception {
    conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);


    rmSecretManagerService = createRMSecretManagerService();
    addService(rmSecretManagerService);
    //容器分配超时监控服务
    containerAllocationExpirer = new ContainerAllocationExpirer(rmDispatcher);
    addService(containerAllocationExpirer);
    rmContext.setContainerAllocationExpirer(containerAllocationExpirer);
		
		//ApplicationMaster状态监控
    AMLivelinessMonitor amLivelinessMonitor = createAMLivelinessMonitor();
    addService(amLivelinessMonitor);
    rmContext.setAMLivelinessMonitor(amLivelinessMonitor);


    AMLivelinessMonitor amFinishingMonitor = createAMLivelinessMonitor();
    addService(amFinishingMonitor);
    rmContext.setAMFinishingMonitor(amFinishingMonitor);
		//HA相关内容,RM在failover后是否允许恢复
    boolean isRecoveryEnabled = conf.getBoolean(
        YarnConfiguration.RECOVERY_ENABLED,
        YarnConfiguration.DEFAULT_RM_RECOVERY_ENABLED);


    RMStateStore rmStore = null;
    if(isRecoveryEnabled) {
      recoveryEnabled = true;
      rmStore =  RMStateStoreFactory.getStore(conf);
    } else {
      recoveryEnabled = false;
      rmStore = new NullRMStateStore();
    }


    try {
      rmStore.init(conf);
      rmStore.setRMDispatcher(rmDispatcher);
    } catch (Exception e) {
      // the Exception from stateStore.init() needs to be handled for
      // HA and we need to give up master status if we got fenced
      LOG.error("Failed to init state store", e);
      throw e;
    }
    rmContext.setStateStore(rmStore);


    if (UserGroupInformation.isSecurityEnabled()) {
      delegationTokenRenewer = createDelegationTokenRenewer();
      rmContext.setDelegationTokenRenewer(delegationTokenRenewer);
    }


    // Register event handler for NodesListManager
    nodesListManager = new NodesListManager(rmContext);
    rmDispatcher.register(NodesListManagerEventType.class, nodesListManager);
    addService(nodesListManager);
    rmContext.setNodesListManager(nodesListManager);


    // Initialize the scheduler
    scheduler = createScheduler();
    rmContext.setScheduler(scheduler);


    schedulerDispatcher = createSchedulerEventDispatcher();
    addIfService(schedulerDispatcher);
    rmDispatcher.register(SchedulerEventType.class, schedulerDispatcher);


    // Register event handler for RmAppEvents
    rmDispatcher.register(RMAppEventType.class,
        new ApplicationEventDispatcher(rmContext));


    // Register event handler for RmAppAttemptEvents
    rmDispatcher.register(RMAppAttemptEventType.class,
        new ApplicationAttemptEventDispatcher(rmContext));


    // Register event handler for RmNodes
    rmDispatcher.register(
        RMNodeEventType.class, new NodeEventDispatcher(rmContext));
		//NodeManager监控服务
    nmLivelinessMonitor = createNMLivelinessMonitor();
    addService(nmLivelinessMonitor);
		//资源跟踪服务,处理来自NodeManager的请求
    resourceTracker = createResourceTrackerService();
    addService(resourceTracker);
    rmContext.setResourceTrackerService(resourceTracker);
		//Metrics服务
    DefaultMetricsSystem.initialize("ResourceManager");
    JvmMetrics.initSingleton("ResourceManager", null);


    try {
      scheduler.reinitialize(conf, rmContext);
    } catch (IOException ioe) {
      throw new RuntimeException("Failed to initialize scheduler", ioe);
    }


    // creating monitors that handle preemption
    createPolicyMonitors();
		//处理来自appMaster的请求,包括注册和心跳
    masterService = createApplicationMasterService();
    addService(masterService) ;
    rmContext.setApplicationMasterService(masterService);
		//ACL管理服务
    applicationACLsManager = new ApplicationACLsManager(conf);


    queueACLsManager = createQueueACLsManager(scheduler, conf);


    rmAppManager = createRMAppManager();
    // Register event handler for RMAppManagerEvents
    rmDispatcher.register(RMAppManagerEventType.class, rmAppManager);
		//客户端RPC请求
    clientRM = createClientRMService();
    rmContext.setClientRMService(clientRM);
    addService(clientRM);
    rmContext.setClientRMService(clientRM);
		//appMaster启动服务
    applicationMasterLauncher = createAMLauncher();
    rmDispatcher.register(AMLauncherEventType.class,
        applicationMasterLauncher);


    addService(applicationMasterLauncher);
    if (UserGroupInformation.isSecurityEnabled()) {
      addService(delegationTokenRenewer);
      delegationTokenRenewer.setRMContext(rmContext);
    }


    new RMNMInfo(rmContext, scheduler);
    //进入父类初始化 函 数 
    //进入父类初始化函数,获得服务列表,循环内上述添加到列表的服务进行初始化(serviceInit)
    super.serviceInit(conf);
  }