一个学python来对栅格数据处理的小白记录一下。踩了很多坑得出的重要经验:对栅格逐像元地分析,就是把栅格转换成数组(或三维或二维),再利用循环调用对应的数学方法就好,最后再转换回栅格数组形式,再输出成tif就好。
———————————————————分割线————————————————————
背景:做长时间序列的变化分析——1990年-2020年,但没有同一个传感器的数据,需要进行逐像元的校正。
举例:已有数据:GLOPEM传感器有1990年-2011年数据,MODIS传感器有2000年-2020年数据。
目标:选取有重合数据的2000-2011年份。重点:每一个像元以2000-2011的GLOPEM数据为x,以2000-2011的MODIS数据为y,做线性回归。即每个像元做线性回归时,x和y分别有12个数据(2011+1-2000=12)。
思路:如图。以2*2的栅格为例。读取栅格数据为数组,转换为线性回归对应的数组,调用linear线性回归的包,再转换成能输出成栅格数据的数组,输出栅格。
附我最后成功的代码:
引用的时候改一改读取栅格数据时的各种参数就好。
# -*- coding:utf-8 -*-
import glob, os, sys
import numpy as np
import numpy.ma as ma
import time
import datetime
from osgeo import gdal #导入osgeo包的gdal模块,GDAL用于读栅格数据,函数返回Dataset对象
from scipy import stats, linalg
from scipy.stats import mstats
import pandas as pd
import matplotlib.pyplot as plt
print("ok")
##############################################################################
def write_raster(dataset_name, output_array, GeoT, proJ, xsize, ysize, fillvalue=-9999.0, driverName='GTiff'):
# print "creating", output_name
dr = gdal.GetDriverByName(driverName)
dr.Register()
# register all of the GDAL drivers
# gdal.AllRegister()
# Create a new dataset with this driver
outDataset = dr.Create(dataset_name, xsize, ysize, 1, gdal.GDT_Float32,
options=['COMPRESS=LZW']) # gdal.GDT_UInt16\GDT_Float32, gdal.GDT_Float32
if outDataset is None:
print("fail to create new raster")
sys.exit(1)
# Write metadata
outDataset.SetGeoTransform(GeoT) # GeoT: inDs.GetGeoTransform()
outDataset.SetProjection(proJ) # proJ: inDs.GetProjection() # proJ: inDs.GetProjection()
# Write raster data sets
outDataset.GetRasterBand(1).WriteArray(output_array)
outDataset.GetRasterBand(1).SetNoDataValue(fillvalue)
# Close raster file
outDataset = None
def export_array_trend(outFol, inTuple, geoTran, geoProj, cols, rows, variable='lin', factor='p', fillvalue= -9999.0, driverName='GTiff'):
''' lineareg, cvmoving'''
outNames = (
".".join([factor, variable, "slope", "tif"]),\
".".join([factor, variable, "intcp", "tif"]),\
".".join([factor, variable, "rval", "tif"]),\
".".join([factor, variable, "pval", "tif"]),\
".".join([factor, variable, "stderr", "tif"])\
) # "An_mkP" + "_" + factor + ".tif"
for i, fname in enumerate(outNames): #可遍历的对象(如列表、字符串),enumerate将其组成一个索引序列
outFdirs = os.path.join(outFol, fname)
write_raster(outFdirs, inTuple[i], geoTran, geoProj, cols, rows, fillvalue= fillvalue, driverName= driverName)
def Calculate_trend(inFol, outFol, factor='p', inFormat=".tif"):
factor_finListm = []
factor_finListg = []
str_1 = 'm'
str_2 = 'g'
for files in os.listdir(inFol):
for year in range(2000, 2012):
if (factor in files) and (inFormat == files[-4:]) and (str_2 in files) and (str(year) in files):
fileIn = os.path.join(inFol, files)
dataset = gdal.Open(fileIn, gdal.GA_ReadOnly)
if dataset is None:
print('Could not open raster file'), fileIn
sys.exit(1)
factor_arrayg = dataset.ReadAsArray().astype(np.float32)
print("factor_arrayg是1:", factor_arrayg)
# factor_array = factor_array.astype('float')
factor_finListg.append(factor_arrayg)
print("factor_finListg是1:", factor_finListg)
factor_arrayg = np.array(factor_finListg)
print("factor_arrayg是2:", factor_arrayg)
for files in os.listdir(inFol):
for year in range(2000, 2012):
if (factor in files) and (inFormat == files[-4:]) and (str_1 in files) and (str(year) in files):
fileIn = os.path.join(inFol, files)
dataset = gdal.Open(fileIn, gdal.GA_ReadOnly)
if dataset is None:
print('Could not open raster file'), fileIn
sys.exit(1)
factor_arraym = dataset.ReadAsArray().astype(np.float32)
#print("factor_arraym是1:", factor_arraym)
# factor_array = factor_array.astype('float')
factor_finListm.append(factor_arraym)
#print("factor_finListm是1:", factor_finListm)
factor_arraym = np.array(factor_finListm)
#print("factor_arraym是2:", factor_arraym)
g = factor_arrayg
print("g是:", g)
m = factor_arraym
print("m是:", m)
a = g.shape[0]
print("a(0维-图像数):", a)
b = g.shape[1]
print("b(1维-行数):", b)
c = g.shape[2]
print("c(2维-列数):", c)
n = b * c
print("n(一个图像的像元数):", n)
# 定义数组,为了把栅格数组转化为能做逐像元线性回归
x = np.array(list([0.0 for i in range(a)] for i in range(n)))
print("x初始:", x)
y = np.array(list([0.0 for i in range(a)] for i in range(n)))
print("y初始:", y)
lin = np.array(list([[[0.0 for i in range(c)] for i in range(b)] for i in range(5)]))
print("lin初始:", lin)
# 把三维b行c列的数组,把对象中对应的第一个元素提取出来,一一对应转换为二维数组
l = 0
for k in range(b):
for j in range(c):
# print("l:", l)
for i in range(a):
# x.append(g[i, j, k])
#print("j:", j)
x[l, i] = g[i, k, j]
#print("gx:", x)
y[l, i] = m[i, k, j]
#print("my:", y)
l = l + 1
##对转换出的二维数组线性回归+把回归结果slope等,放在栅格三维数组里
j = 0
k = 0
for i in range(x.shape[0]):
#res = stats.linregress(x[i], y[i])
slope, intercept, r_value, p_value, std_err = stats.linregress(x[i], y[i])
lin[0, j, k] = slope
lin[1, j, k] = intercept
lin[2, j, k] = r_value
lin[3, j, k] = p_value
lin[4, j, k] = std_err
k = k + 1
if k > c - 1:
j = j + 1
k = 0
print("lin:", lin)
kwargs = {"fillvalue": -9999.0, "plot": False}
export_array_trend(outFol, lin, geoTran, geoProj, cols, rows, variable='lin', factor=factor, fillvalue=-9999.0, driverName='GTiff')
#####main
start_time = time.time()
inFol = r"D:\py\jz\jz00" #读入的文件夹路径
outFol = r"D:\py\jz\jz00\r00" #输出的新文件的路径
if not os.path.exists(outFol): #os.path.exists()函数用来检验给出的路径是否真地存在 返回bool
os.makedirs(outFol) #makedirs(path):递归式的创建文件夹,注:创建已存在的文件夹将异常
### get raster tiff infomation(GeoTransform, Projection)
global geoTran, geoProj, cols, rows, nodatav
for allRasters in os.listdir(inFol): #listdir(path):列举目录下的所有文件
if allRasters.endswith(".tif"):
firstRasPath = os.path.join(inFol, allRasters) #加入目录下的所有栅格文件
print("栅格数据:", firstRasPath)
break
if firstRasPath is None:
print ('Cannot open ', inFol)
sys.exit(1)
firstdataset = gdal.Open(firstRasPath, gdal.GA_ReadOnly)
cols= firstdataset.RasterXSize
rows= firstdataset.RasterYSize
bandnum = firstdataset.RasterCount
driver = firstdataset.GetDriver()
geoTran = firstdataset.GetGeoTransform()
geoProj= firstdataset.GetProjection()
nodatav = firstdataset.GetRasterBand(1).GetNoDataValue()
Calculate_trend(inFol, outFol, factor='p', inFormat = ".tif")
#linear_reg = Trend_climate(varname = "temp", fillvalue= -9999.0)
#slope, intercept, r_value, p_value, std_err = Trend_climate(varname = "temp", fillvalue= -9999.0)
print ('elapsed time: %.2f s' %(time.time() - start_time))