一个学python来对栅格数据处理的小白记录一下。踩了很多坑得出的重要经验:对栅格逐像元地分析,就是把栅格转换成数组(或三维或二维),再利用循环调用对应的数学方法就好,最后再转换回栅格数组形式,再输出成tif就好。

———————————————————分割线————————————————————

背景:做长时间序列的变化分析——1990年-2020年,但没有同一个传感器的数据,需要进行逐像元的校正。

举例:已有数据:GLOPEM传感器有1990年-2011年数据,MODIS传感器有2000年-2020年数据。

         目标:选取有重合数据的2000-2011年份。重点:每一个像元以2000-2011的GLOPEM数据为x,以2000-2011的MODIS数据为y,做线性回归。即每个像元做线性回归时,x和y分别有12个数据(2011+1-2000=12)。

思路:如图。以2*2的栅格为例。读取栅格数据为数组,转换为线性回归对应的数组,调用linear线性回归的包,再转换成能输出成栅格数据的数组,输出栅格。

长时间栅格图像序列多元回归残差分析python 栅格数据回归分析_栅格

 

长时间栅格图像序列多元回归残差分析python 栅格数据回归分析_栅格_02

长时间栅格图像序列多元回归残差分析python 栅格数据回归分析_栅格_03

 

 附我最后成功的代码:

引用的时候改一改读取栅格数据时的各种参数就好。

# -*- coding:utf-8 -*-
import glob, os, sys
import numpy as np
import numpy.ma as ma
import time
import datetime
from osgeo import gdal                     #导入osgeo包的gdal模块,GDAL用于读栅格数据,函数返回Dataset对象
from scipy import stats, linalg
from scipy.stats import mstats
import pandas as pd
import matplotlib.pyplot as plt
print("ok")
##############################################################################

def write_raster(dataset_name, output_array, GeoT, proJ, xsize, ysize, fillvalue=-9999.0, driverName='GTiff'):
    # print "creating", output_name
    dr = gdal.GetDriverByName(driverName)
    dr.Register()
    # register all of the GDAL drivers
    # gdal.AllRegister()
    # Create a new dataset with this driver
    outDataset = dr.Create(dataset_name, xsize, ysize, 1, gdal.GDT_Float32,
                           options=['COMPRESS=LZW'])  # gdal.GDT_UInt16\GDT_Float32, gdal.GDT_Float32

    if outDataset is None:
        print("fail to create new raster")
        sys.exit(1)
    # Write metadata
    outDataset.SetGeoTransform(GeoT)  # GeoT: inDs.GetGeoTransform()
    outDataset.SetProjection(proJ)  # proJ: inDs.GetProjection() # proJ: inDs.GetProjection()

    #  Write raster data sets
    outDataset.GetRasterBand(1).WriteArray(output_array)
    outDataset.GetRasterBand(1).SetNoDataValue(fillvalue)
    # Close raster file
    outDataset = None


def export_array_trend(outFol, inTuple, geoTran, geoProj, cols, rows, variable='lin', factor='p', fillvalue= -9999.0, driverName='GTiff'):
    ''' lineareg, cvmoving'''
    outNames = (
        ".".join([factor, variable, "slope", "tif"]),\
        ".".join([factor, variable, "intcp", "tif"]),\
        ".".join([factor, variable, "rval", "tif"]),\
        ".".join([factor, variable, "pval", "tif"]),\
        ".".join([factor, variable, "stderr", "tif"])\
        ) # "An_mkP"  + "_" + factor + ".tif"
    for i, fname in enumerate(outNames):                     #可遍历的对象(如列表、字符串),enumerate将其组成一个索引序列
        outFdirs = os.path.join(outFol, fname)
        write_raster(outFdirs, inTuple[i],  geoTran, geoProj, cols, rows, fillvalue= fillvalue, driverName= driverName)


def Calculate_trend(inFol, outFol, factor='p', inFormat=".tif"):
    factor_finListm = []
    factor_finListg = []
    str_1 = 'm'
    str_2 = 'g'

    for files in os.listdir(inFol):
        for year in range(2000, 2012):
            if (factor in files) and (inFormat == files[-4:]) and (str_2 in files) and (str(year) in files):
                fileIn = os.path.join(inFol, files)
                dataset = gdal.Open(fileIn, gdal.GA_ReadOnly)
                if dataset is None:
                    print('Could not open raster file'), fileIn
                    sys.exit(1)
                factor_arrayg = dataset.ReadAsArray().astype(np.float32)
                print("factor_arrayg是1:", factor_arrayg)
                # factor_array = factor_array.astype('float')
                factor_finListg.append(factor_arrayg)
                print("factor_finListg是1:", factor_finListg)
        factor_arrayg = np.array(factor_finListg)
        print("factor_arrayg是2:", factor_arrayg)

    for files in os.listdir(inFol):
        for year in range(2000, 2012):
            if (factor in files) and (inFormat == files[-4:]) and (str_1 in files) and (str(year) in files):
                fileIn = os.path.join(inFol, files)
                dataset = gdal.Open(fileIn, gdal.GA_ReadOnly)
                if dataset is None:
                    print('Could not open raster file'), fileIn
                    sys.exit(1)
                factor_arraym = dataset.ReadAsArray().astype(np.float32)
                #print("factor_arraym是1:", factor_arraym)
                # factor_array = factor_array.astype('float')
                factor_finListm.append(factor_arraym)
                #print("factor_finListm是1:", factor_finListm)
                factor_arraym = np.array(factor_finListm)
                #print("factor_arraym是2:", factor_arraym)
    g = factor_arrayg
    print("g是:", g)
    m = factor_arraym
    print("m是:", m)

    a = g.shape[0]
    print("a(0维-图像数):", a)
    b = g.shape[1]
    print("b(1维-行数):", b)
    c = g.shape[2]
    print("c(2维-列数):", c)
    n = b * c
    print("n(一个图像的像元数):", n)

    # 定义数组,为了把栅格数组转化为能做逐像元线性回归
    x = np.array(list([0.0 for i in range(a)] for i in range(n)))
    print("x初始:", x)
    y = np.array(list([0.0 for i in range(a)] for i in range(n)))
    print("y初始:", y)
    lin = np.array(list([[[0.0 for i in range(c)] for i in range(b)] for i in range(5)]))
    print("lin初始:", lin)

    # 把三维b行c列的数组,把对象中对应的第一个元素提取出来,一一对应转换为二维数组
    l = 0
    for k in range(b):
        for j in range(c):
            # print("l:", l)
            for i in range(a):
                # x.append(g[i, j, k])
                #print("j:", j)
                x[l, i] = g[i, k, j]
                #print("gx:", x)
                y[l, i] = m[i, k, j]
                #print("my:", y)
            l = l + 1

        ##对转换出的二维数组线性回归+把回归结果slope等,放在栅格三维数组里
        j = 0
        k = 0
    for i in range(x.shape[0]):
        #res = stats.linregress(x[i], y[i])
        slope, intercept, r_value, p_value, std_err = stats.linregress(x[i], y[i])
        lin[0, j, k] = slope
        lin[1, j, k] = intercept
        lin[2, j, k] = r_value
        lin[3, j, k] = p_value
        lin[4, j, k] = std_err
        k = k + 1
        if k > c - 1:
            j = j + 1
            k = 0
    print("lin:", lin)

    kwargs = {"fillvalue": -9999.0, "plot": False}
    export_array_trend(outFol, lin, geoTran, geoProj, cols, rows, variable='lin', factor=factor, fillvalue=-9999.0, driverName='GTiff')



#####main
start_time = time.time()

inFol = r"D:\py\jz\jz00" #读入的文件夹路径
outFol = r"D:\py\jz\jz00\r00"    #输出的新文件的路径
if not os.path.exists(outFol):                             #os.path.exists()函数用来检验给出的路径是否真地存在 返回bool
    os.makedirs(outFol)                                    #makedirs(path):递归式的创建文件夹,注:创建已存在的文件夹将异常

### get raster tiff infomation(GeoTransform, Projection)
global geoTran, geoProj, cols, rows, nodatav

for allRasters in os.listdir(inFol):                      #listdir(path):列举目录下的所有文件

    if allRasters.endswith(".tif"):
        firstRasPath = os.path.join(inFol, allRasters)    #加入目录下的所有栅格文件
        print("栅格数据:", firstRasPath)
        break
if firstRasPath is None:
    print ('Cannot open ', inFol)
    sys.exit(1)

firstdataset = gdal.Open(firstRasPath, gdal.GA_ReadOnly)
cols= firstdataset.RasterXSize
rows= firstdataset.RasterYSize
bandnum =  firstdataset.RasterCount
driver = firstdataset.GetDriver()
geoTran = firstdataset.GetGeoTransform()
geoProj= firstdataset.GetProjection()
nodatav = firstdataset.GetRasterBand(1).GetNoDataValue()

Calculate_trend(inFol, outFol, factor='p', inFormat = ".tif")

#linear_reg = Trend_climate(varname = "temp", fillvalue= -9999.0)
#slope, intercept, r_value, p_value, std_err = Trend_climate(varname = "temp", fillvalue= -9999.0)

print ('elapsed time: %.2f s' %(time.time() - start_time))