根据题意:最后就是求f(b) + f(k + b) + f(2 * k + b) + …+ f((n-1) * k + b)
显然f(b) = A^b
其中A =
1 1
1 0
所以sum(n - 1) = A^b(E + A^ k + A ^(2 * k) + … + A ^((n - 1) * k)
设D = A^k
sum(n-1) = A^b(E + D + D ^ 2 + … + D ^(n - 1))
括号里的部分就可以二分递归求出来了
而单个矩阵就可以用矩阵快速幂求出来

/*************************************************************************
    > File Name: hdu1588.cpp

    > Created Time: 2015年03月12日 星期四 18时25分07秒
 ************************************************************************/

#include <map>
#include <set>
#include <queue>
#include <stack>
#include <vector>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;

const double pi = acos(-1.0);
const int inf = 0x3f3f3f3f;
const double eps = 1e-15;
typedef long long LL;
typedef pair <int, int> PLL;

LL mod, k, b;

class MARTIX
{
    public:
        LL mat[3][3];
        MARTIX();
        MARTIX operator * (const MARTIX &b)const;
        MARTIX operator + (const MARTIX &b)const;
        MARTIX& operator = (const MARTIX &b);
}A, E, D;

MARTIX :: MARTIX()
{
    memset (mat, 0, sizeof(mat));
}

MARTIX MARTIX :: operator * (const MARTIX &b)const
{
    MARTIX ret;
    for (int i = 0; i < 2; ++i)
    {
        for (int j = 0; j < 2; ++j)
        {
            for (int k = 0; k < 2; ++k)
            {
                ret.mat[i][j] += this -> mat[i][k] * b.mat[k][j];
                ret.mat[i][j] %= mod;
            }
        }
    }
    return ret;
}

MARTIX MARTIX :: operator + (const MARTIX &b)const
{
    MARTIX ret;
    for (int i = 0; i < 2; ++i)
    {
        for (int j = 0; j < 2; ++j)
        {
            ret.mat[i][j] = this -> mat[i][j] + b.mat[i][j];
            ret.mat[i][j] %= mod;
        }
    }
    return ret;
}

MARTIX& MARTIX :: operator = (const MARTIX &b)
{
    for (int i = 0; i < 2; ++i)
    {
        for (int j = 0; j < 2; ++j)
        {
            this -> mat[i][j] = b.mat[i][j];
        }
    }
    return *this;
}

MARTIX fastpow(MARTIX ret, LL n)
{
    MARTIX ans;
    ans.mat[0][0] = ans.mat[1][1] = 1;
    while (n)
    {
        if (n & 1)
        {
            ans = ans * ret;
        }
        n >>= 1;
        ret = ret * ret;
    }
    return ans;
}

void Debug(MARTIX A)
{
    for (int i = 0; i < 2; ++i)
    {
        for (int j = 0; j < 2; ++j)
        {
            printf("%lld ", A.mat[i][j]);
        }
        printf("\n");
    }
}

MARTIX binseach(LL n)
{
    if (n == 1)
    {
        return D;
    }
    MARTIX nxt = binseach(n >> 1);
    MARTIX B = fastpow(D, n / 2);
    B = B + E;
    nxt = nxt * B;
    if (n & 1)
    {
        MARTIX C = fastpow(D, n);
        nxt = nxt + C;
    }
    return nxt;
}

int main()
{
    LL n;
    E.mat[0][0] = E.mat[1][1] = 1;
    A.mat[0][0] = A.mat[0][1] = A.mat[1][0] = 1;
//  Debug(A);
    while (~scanf("%lld%lld%lld%lld", &k, &b, &n, &mod))
    {
        if (n == 1)
        {
            MARTIX x = fastpow(A, b);
            printf("%lld\n", x.mat[0][1]);
            continue;
        }
        D = fastpow(A, k);
        MARTIX ans = binseach(n - 1);
        ans = ans + E;
        MARTIX base = fastpow(A, b);
        ans = base * ans;
//      Debug(ans);
        printf("%lld\n", ans.mat[0][1]);
    }
    return 0;
}