线程池设计（六千字长文详解）

精选原创

为世界献上祝福 2024-01-25 10:20:10 博主文章分类：c++ ©著作权

文章标签 #include 线程池任务队列 文章分类 C/C++ 后端开发 yyds干货盘点

©著作权归作者所有：来自51CTO博客作者为世界献上祝福的原创作品，请联系作者获取转载授权，否则将追究法律责任

线程池设计

什么是线程池

在处理外部的某种请求（例如：网络）的时候，一般我们都是来了任务，我们就会去创建一个线程，然后让线程去处理任务——这种方式是没有问题的！

但是来了任务才去创建线程！这样的后果就是会让我们的效率降低！因为创建线程也是有成本的也要花时间和花空间！

关于池化技术，如果使用过STL的容器，那么已经接触过了——就是空间配置器，就是一个内存池，内存池和线程池的思想是一样的

这两种方案——肯定是一次性申请1000byte的方案效率更高！虽然两种方案的结果是一样的！但是第二种方案比第一种方案少了9次的系统调用！而系统调用管道成本是比调用普通函数的成本更高的！例如：要从用户态到内核态，修改CPU的执行权限，页表要从用户页表到内核页表等等，而且有可能内存非常紧张，导致了每次申请都会触发操作系统底层的内存管理，例如合并碎片，空间调整等等腾出100byte的空间给我们，每次都怎么调用会让系统调用的成本很高！

但是如果是一次系统调用，剩下的由我们自己维护内存空间！这样子不用频繁的调用系统调用，从而大大的减少我们的时间！

==所以内存池的存在也是基于同样的理由！——就是尽量的减少系统调用！提高工作效率！（本质就是以空间换时间）内存池的大小要在预期之内，比预期大一些==

现实中也有这样的例子：例如一个家庭是一个农村家庭，通过种粮食从事生产活动，收获的时候肯定要将一批粮食存起来，作为自己口粮——不能说，自己家里人说饿了，才说去种小麦，这样晚了！

==这种将东西预先保留起来，预先多申请一部分的行为其实都是一种池化技术！就像是赚钱，我们可以赚多少花多少，但是为什么我们要存起来？就是为了满足未来可能的必要的开销！==

==这就是池化技术的本质——预先使用更大的成本，先申请一部分资源！当我们要用的时候能够直接拿出来==！

==我们未来的所有任务都是要用线程来处理的！线程到来之前，我们也要将任务预先的放在某个地方缓存起来！，**然后我们也不要当有任务的是才创建线程！而是预先的先去把线程创建好！**当有任务的时候，这些线程再去竞争的申请这些任务拿走再去处理！！==

==这就是线程池的模型==

线程就是有任务就处理，没有任务就休眠！等有任务的时候就重新唤醒！唤醒线程成本相比创建线程成本更低！

任务可以从外部传过来！（例如：网络）然后的另一个服务端，将收到的任务，push到任务队列里面！就不管了！然后让线程们去处理任务！

==任务队列+一堆的线程就是线程池！==——==其实这个模型就是典型的生产消费模型！==

线程池的实现

//LcokGuard.hpp
#pragma once
#include<iostream>
#include<pthread.h>

namespace threadNs
{
    class Mutex
    {
    public:
        Mutex(pthread_mutex_t *lock_p)
            : lock_p_(lock_p)
        {
        }
        void lock()
        {
            if(lock_p_) pthread_mutex_lock(lock_p_);
        }
        void unlock()
        {
            if(lock_p_) pthread_mutex_unlock(lock_p_);
        }
        ~Mutex()
        {}
    private:
        pthread_mutex_t *lock_p_;
    };

    class LockGuard
    {
    public:
        LockGuard(pthread_mutex_t *mutex)
            : mutex_(mutex)
        {
            mutex_.lock();
        }
        ~LockGuard()
        {
            mutex_.unlock();
        }
    private:
        Mutex mutex_;
    };
}

//Task.hpp
#pragma once
#include<iostream>
#include<functional>
#include<cstdio>
#include<ctime>
#include<string>
#include<map>
#include<fstream>
class CalTask
{
    using func_t = std::function<int(int,int,const std::string&)>;
public:
    CalTask()
    {}

    CalTask(int x, int y, const std::string& op, func_t func)
        : x_(x), y_(y), op_(op), callback_(func)
    {}

    std::string operator()()
    {
        int result = callback_(x_, y_, op_);
        char buffer[1024];
        snprintf(buffer, sizeof buffer, "%d %s %d = %d", x_, op_.c_str(), y_, result);
        return buffer;
    }

    std::string toTaskString()
    {
        char buffer[1024];
        snprintf(buffer, sizeof buffer, "%d %s %d = ?", x_, op_.c_str(), y_);
        return buffer;
    }

private:
    int x_;
    int y_;
    std::string op_;
    func_t callback_;
};


const std:: string oper = "+-*/%";
int mymath(int x,int y,const std::string& op)
{
    using func_t = std::function<int(int,int)>;
    std::map<std::string,func_t> opfuncmap = 
    {
        {"/",[](int x,int y)
        {
            if(y == 0)
            {
                std::cout << "div zero error!" << std::endl;
                return -1;
            }
            else return x/y;
        }},
        {"%",[](int x,int y)
        {
            if(y == 0)
            {
                std::cout << "mod zero error!" << std::endl;
                return -1;
            }
            else return x%y;
        }},
        {"*",[](int x,int y){return x*y;}},
        {"+",[](int x,int y){return x+y;}},
        {"-",[](int x,int y){return x-y;}}
    };
    if(oper.find(op) != std::string::npos)
        return opfuncmap[op](x, y);
    else 
    {
        std::cout << "op error!" << std::endl;
        return -1;
    }
}

//Thread.hpp
#pragma once
#include <iostream>
#include <pthread.h>
#include <functional>
#include <cstring>
#include <cassert>
#include <string>

namespace threadNs
{
    using func_t = std::function<void *(void *)>;
    const int num = 1024;
    class Thread
    {
    private:
        static void *start_routine(void *_this) // 传入的是this指针！
        {
            Thread *this_ = static_cast<Thread *>(_this);
            return this_->callback(); // 调用回调！
        }
        void *callback() // 运行这个的时候就运行了函数！
        {
            return func_(args_);
        }
    public:
        Thread()
        {
            char namebuffer[num];
            snprintf(namebuffer, sizeof namebuffer, "thread-%d",threadnum++);
            name_ = namebuffer;
        }

        void join()
        {
            int n = pthread_join(tid_, nullptr);
            assert(n == 0);
            (void)n;
        }

        void start(func_t func, void *args)
        {
            func_ = func;
            args_ = args;//这个是ThreadPool类的this
            int n = pthread_create(&tid_, nullptr, start_routine, this);//这个是Thread类的this!让静态能去调用fun_
            assert(n == 0);
            (void)n;
        }

        std::string Threadname()
        {
            return name_;
        }

        ~Thread()
        {
        }
    private:
        std::string name_; // 线程的名字
        func_t func_;      // 线程的回调函数
        pthread_t tid_;    // 线程的id
        void *args_;       // 线程的参数
        
        static int threadnum;//线程编号！
    };
    int Thread::threadnum = 1;
}

==线程池的模拟实现！==

#pragma once
#include "Thread.hpp"
#include "LockGuard.hpp"
#include <vector>
#include<queue>
#include <pthread.h>
#include<unistd.h>
using namespace threadNs;
const int gnum = 3;

template<class T>
class ThreadPool;

template<class T>
class ThreadData
{
public:
       ThreadPool<T> *threadpool_;
       std::string name_;
public:
       ThreadData(ThreadPool<T>* tp,const std::string& name)
           :threadpool_(tp)
               ,name_(name)
           {}
};

template <class T>
class ThreadPool
{
private:
       static void* handlerTask(void* args)//因为是类内成员所以要加锁static！
       {
           ThreadData<T>* data = static_cast<ThreadData<T>*>(args);
           ThreadPool<T>* tp = data->threadpool_;
           while(true)
           {
               T t;
               {
                   // 每一个线程都要处理这个handlerTask！
                   // 处理任务首先就是要检测是否有任务！——那么就要去访问任务队列！
                   // 因为任务队列要被多个线程访问，是一个共享资源！所以要加锁！
                   LockGuard lock(&tp->mutex_); // 我们使用LockGuard来让其自动加锁和解锁！
                   while (tp->IsQueueEmpty())   // 如果任务队列为空，就让线程等待！
                   {
                       tp->threadwait(); // 让线程等待！
                   }
                   // 如果任务队列不为空，那么就取出任务！
                   t = tp->pop();//pop的本质是从队列里面,拿到当前线程自己独立的栈中！
               }
               //处理任务的时候，不用放在锁里面！因为此时任务已经在当前线程的独立栈中了！
               //如果放在锁里面，那么就会让每个线程都是先加锁，拿任务,处理,解锁，这样子线程处理任务其实是串行的！
               //如果放在锁外面！就可以让每个线程都是先拿任务，然后解锁，这样子处理任务的时候，线程就可以并行处理了！

               // t();//如果么没有实例化这个会报错！因为可能是一个不可调用对象！
               std::cout << data->name_ << " 获取了任务 " << t.toTaskString()<<" 并处理完成！结果是: "<< t()<<std::endl;
           }
           delete data;
           return nullptr;
       }
public:
       bool IsQueueEmpty(){return task_queue_.empty();}
       void threadwait(){pthread_cond_wait(&cond_, &mutex_);}
       T pop()
       {
           T t = task_queue_.front();
           task_queue_.pop();
           return t;
       }
public:
       ThreadPool(const int &num = gnum)
           :num_(num)
           {
               pthread_mutex_init(&mutex_, nullptr);//初始化锁！
               pthread_cond_init(&cond_, nullptr);//初始化条件变量！
               for(int i = 0; i < num_; ++i)
               {
                   threads_.push_back(new Thread());
               } 
           }

       void run()//让线程池启动起来！
       {
           for(const auto& t : threads_)
           {
               ThreadData<T>* data = new ThreadData<T>(this,t->Threadname());
               t->start(handlerTask,data);//这个start是Thread类的start！
               std::cout << "thread " << t->Threadname() << " is running" << std::endl;
           }
       }

       ~ThreadPool()
       {
           pthread_mutex_destroy(&mutex_);
           pthread_cond_destroy(&cond_);
           for(const auto& e : threads_)
           {
               delete e;
           }
       }

       void push(const T&in)//让外部能够向线程池里面放任务！
       {
           LockGuard lock(&mutex_);
           task_queue_.push(in);//向任务队列里面放任务！
           pthread_cond_signal(&cond_);//已经有任务了，那么就唤醒线程！
       }
private:
       int num_;//表示有几个线程
       std::vector<Thread*> threads_;//使用vector管理线程！
       //放的是线程的指针！如果是线程对象就太大了！
       std::queue<T> task_queue_;
       //任务队列！用于存放任务的！
       pthread_mutex_t mutex_;
       //用这个锁来保护共享资源——任务队列！
       pthread_cond_t cond_;
       //让线程等不到任务的时候就去信号量下面等待！
};

#include "ThreadPool.hpp"
#include "Task.hpp"
#include<memory>

int main()
{
       std::unique_ptr<ThreadPool<CalTask>> tp(new ThreadPool<CalTask>());
       tp->run();
       int x ,y;
       std::string op;
       while(1)
       {
           //实际中我们获取数据是从数据库或者网络中获取！
           std::cout << "请输入数据1# " << std::endl;
           std::cin >> x;
           std::cout << "请输入数据2# " << std::endl;
           std::cin >> y;
           std::cout << "请输入操作符# " << std::endl;
           std::cin >> op;
           CalTask t1(x,y,op,mymath);
           std::cout << "你刚刚录入了一个任务！" << t1.toTaskString()<< "确认提交吗？[y/n]" <<std::endl;
           char confirm;
           std::cin >> confirm;
           if(confirm == 'y')
               tp->push(t1);
           else
           {
               std::cout << "任务已取消！" << std::endl;
           }
           sleep(1);
       };
       return 0;
}

==线程池还有一个优势——就是可以控制线程总数！如果我们控制了线程池总数只有4个线程！未来即使有再多的任务到来，永远都是4个线程线程！——这样子好么？是好的！因为如果网络的情况下！如果有新的客户端来了！那么就是必有很多链接过来！如果线程过多本质是在冲击我们的服务器！如果线程有一个上限，那么就可以让过多的客户端去等待！从而维护服务器的稳定！==

线程池总结

线程池：一种线程使用模式**。线程过多会带来调度开销，进而影响缓存局部性和整体性能。而线程池维护着多个线程，等待着监督管理者分配可并发执行的任务**。==这避免了在处理短时间任务时创建与销毁线程的代价。线程池不仅能够保证内核的充分利用，还能防止过分调度。==可用线程数量应该取决于可用的并发处理器、处理器内核、内存、网络sockets等的数量。

线程池的应用场景：

需要大量的线程来完成任务，且完成任务的时间比较短。 WEB服务器完成网页请求这样的任务，使用线程池技术是非常合适的。因为单个任务小，而任务数量巨大，你可以想象一个热门网站的点击次数。但对于长时间的任务，比如一个Telnet连接请求，线程池的优点就不明显了。因为Telnet会话时间比线程的创建时间大多了。

对性能要求苛刻的应用，比如要求服务器迅速响应客户请求。

接受突发性的大量请求，但不至于使服务器因此产生大量线程的应用。突发性大量客户请求，在没有线程池情况下，将产生大量线程，虽然理论上大部分操作系统线程数目最大值不是问题，短时间内产生大量线程可能使内存到达极限，出现错误.

线程池的种类：

线程池示例：

创建固定数量线程池，循环从任务队列中获取任务对象，

获取到任务对象后，执行任务对象中的任务接

单例模式版线程池

#include "Thread.hpp"
#include "LockGuard.hpp"
#include <vector>
#include<queue>
#include <pthread.h>
#include<unistd.h>
#include<mutex>
using namespace threadNs;
const int gnum = 3;

template<class T>
class ThreadPool;

template<class T>
class ThreadData
{
public:
    ThreadPool<T> *threadpool_;
    std::string name_;
public:
    ThreadData(ThreadPool<T>* tp,const std::string& name)
        :threadpool_(tp)
        ,name_(name)
    {}
};

template <class T>
class ThreadPool
{
private:
    static void* handlerTask(void* args)
    {
        ThreadData<T>* data = static_cast<ThreadData<T>*>(args);
        ThreadPool<T>* tp = data->threadpool_;
        while(true)
        {
            T t;
            {
                LockGuard lock(&tp->mutex_);
                while (tp->IsQueueEmpty())
                {
                    tp->threadwait(); 
                }
                t = tp->pop();
            }

            std::cout << data->name_ << " 获取了任务 " << t.toTaskString()<<" 并处理完成！结果是: "<< t()<<std::endl;
        }
        delete data;
        return nullptr;
    }
    ThreadPool(const int &num = gnum)//构造函数私有化！
        :num_(num)
    {
        pthread_mutex_init(&mutex_, nullptr);//初始化锁！
        pthread_cond_init(&cond_, nullptr);//初始化条件变量！
        for(int i = 0; i < num_; ++i)
        {
           threads_.push_back(new Thread());
        } 
    }
    ThreadPool& operator=(const ThreadPool&) = delete;//赋值运算删除！
    ThreadPool(const ThreadPool&) = delete;//拷贝构造函数删除！
public:
    bool IsQueueEmpty(){return task_queue_.empty();}
    void threadwait(){pthread_cond_wait(&cond_, &mutex_);}
    T pop()
    {
        T t = task_queue_.front();
        task_queue_.pop();
        return t;
    }

public:

    void run()
    {
        for(const auto& t : threads_)
        {
            ThreadData<T>* data = new ThreadData<T>(this,t->Threadname());
            t->start(handlerTask,data);
            std::cout << "thread " << t->Threadname() << " is running" << std::endl;
        }
    }
    
    // 要加上static!因为单例模式只能有一个
    // 如果是非静态的话,那么就得先创建对象，然后调用给这个对象的非静态成员函数！
    // 但是这样子就不是单例模式了！
    static ThreadPool<T> *getInstance()
    {
        if (pool_ == nullptr)//双判断！除了第一次申请要加锁！后面都不需要！所以叫双判断！
        {
            sinlock_.lock();
            if (pool_ == nullptr)
            {
                if (pool_ == nullptr)
                    pool_ = new ThreadPool<T>();
            }
            sinlock_.unlock();
        }
        return pool_;
    }
    ~ThreadPool()
    {
        pthread_mutex_destroy(&mutex_);
        pthread_cond_destroy(&cond_);
        for(const auto& e : threads_)
        {
            delete e;
        }
    }

    void push(const T&in)
    {
        LockGuard lock(&mutex_);
        task_queue_.push(in);
        pthread_cond_signal(&cond_);
    }
private:
    int num_;
    std::vector<Thread*> threads_;
    std::queue<T> task_queue_;
    pthread_mutex_t mutex_;
    pthread_cond_t cond_;
    
    static ThreadPool<T>* pool_;//我们使用的是懒汉模式！
    static std::mutex sinlock_;//单例模式的锁！用于保护pool_！
    //为了保证多线程的并发访问！
};
//类模板的静态成员变量的初始化！
template<class T>
ThreadPool<T>* ThreadPool<T>::pool_ = nullptr;

template<class T>
std::mutex ThreadPool<T>::sinlock_;