菜鸟这两天在数据查询的工作中遇见了一个有意思的问题,在这里抛砖引玉,将自己的思考与解决方法分享给大家,如果你有更好的方法,请务必指教一下,谢谢。

这里就不拿工作的数据来解析,自己新建了数据拿来解析,数据的准确性菜鸟并没有逐一确认。

问题:获取每个班级最新的某一个教室的预定信息


表结构:


CREATE TABLE Room_Reservation(
     RoomNumber NVARCHAR(100), --教室编号
	 ClassNumber NVARCHAR(100),--班级编号
	 Section INT,             --第几节课
	 OrderDate NVARCHAR(50),  --预约日期
	 Reason    NVARCHAR(300) --理由
);


数据:

INSERT INTO Room_Reservation(RoomNumber,ClassNumber,Section,OrderDate,Reason) VALUES
(N'A栋1楼C2','1998RC001',1,'20001206','Active'),
(N'A栋1楼C2','1998RC002',2,'20001206','Active'),
(N'A栋1楼C3','1998DC001',4,'20001206','Active'),
(N'A栋1楼C2','1998RC001',8,'20001206','Active'),
(N'A栋3楼C5','2000RC001',1,'20120608','Active'),
(N'B栋4楼A2','1998RC001',3,'20000520','Active'),
(N'B栋1楼B2','1997RC030',4,'20001206','Range'),
(N'D栋1楼D1','1998RC001',1,'20101120','Lecture'),
(N'D栋1楼D1','2000RC011',1,'20101120','Lecture'),
(N'D栋1楼D1','1998RC021',1,'20101120','Lecture'),
(N'D栋1楼D1','1998RC001',2,'20101120','Lecture'),
(N'D栋1楼D1','2000RC011',2,'20101120','Lecture'),
(N'D栋1楼D1','1998RC021',2,'20101120','Lecture'),
(N'D栋1楼D1','1998RC001',3,'20101120','Lecture'),
(N'D栋1楼D1','2000RC011',3,'20101120','Lecture'),
(N'D栋1楼D1','1998RC021',3,'20101120','Lecture'),
(N'A栋5楼E4','1998RC021',1,'20010706','Active'),
(N'A栋5楼E4','1998RC021',2,'20010706','Active'),
(N'A栋5楼E4','1998RC021',3,'20010706','Active'),
(N'A栋6楼E4','1998RC021',4,'20010706','Active'),
(N'A栋1楼B2','1998RC008',5,'20010706','Active'),
(N'A栋1楼B2','1998RC010',6,'20010706','Active'),
(N'B栋1楼C2','2008RC001',1,'20101206','Active'),
(N'B栋1楼C2','2008RC001',2,'20101206','Active'),
(N'B栋1楼C2','2008RC009',3,'20101206','Active'),
(N'B栋1楼C2','2008RC009',4,'20101206','Active'),
(N'B栋1楼C2','2008RC008',5,'20101206','Active'),
(N'B栋1楼C2','2008RC008',6,'20101206','Active'),
(N'B栋1楼C2','2008RC008',7,'20101206','Active'),
(N'B栋1楼C2','2008RC008',8,'20101206','Active'),
(N'B栋1楼C2','2008RC008',1,'20110806','Active'),
(N'B栋1楼C2','2008RC008',2,'20110806','Active'),
(N'A栋1楼C3','2007DC001',1,'20091106','Active'),
(N'A栋1楼C3','2007DC001',2,'20091106','Active'),
(N'A栋1楼C3','2007DC007',3,'20100209','Active'),
(N'A栋1楼C3','2007DC007',4,'20100209','Active'),
(N'A栋5楼E4','2010CC009',1,'20101206','Active'),
(N'A栋5楼E4','2010CC009',2,'20101206','Active'),
(N'B栋1楼C2','2010CC011',3,'20130706','Active'),
(N'B栋1楼C2','2010CC011',4,'20130706','Active'),
(N'B栋1楼C2','2013FC002',7,'20140507','Active'),
(N'B栋1楼C2','2013FC002',8,'20140507','Active'),
(N'D栋1楼C2','2014FC002',1,'20140507','Active'),
(N'D栋1楼C2','2014FC002',2,'20140507','Active'),
(N'D栋1楼C2','2014FC003',3,'20140507','Active'),
(N'D栋1楼C2','2014FC003',4,'20140507','Active'),
(N'D栋1楼C2','2014FC004',5,'20140507','Active'),
(N'D栋1楼C2','2014FC004',6,'20140507','Active'),
(N'D栋1楼C2','2014FC005',7,'20140507','Active'),
(N'D栋1楼C2','2014FC005',8,'20140507','Active'),
(N'D栋1楼C2','2014FC006',9,'20140507','Active'),
(N'D栋1楼C2','2014FC006',10,'20140507','Active'),
(N'D栋1楼C2','2014FC006',3,'20140607','Active'),
(N'D栋1楼C2','2014FC006',4,'20140607','Active'),
(N'A栋1楼C2','2014FC005',7,'20140607','Active'),
(N'A栋1楼C2','2014FC005',8,'20140607','Active'),
(N'B栋1楼C2','2014FC005',9,'20140607','Active'),
(N'B栋1楼C2','2014FC005',10,'20140607','Active');




SQL Code


/*
  应有结果条数:22
  有多少班级,应该就有多少条数据
*/
SELECT DISTINCT ClassNumber FROM Room_Reservation 


/*
  这个问题的关键是如何找到符号问题的数据行

  第一个想法,按班级分组,再按日期降序,取第一个行,就OK了
*/
WITH A AS(
	SELECT ClassNumber,RoomNumber,OrderDate, RANK() OVER(PARTITION BY ClassNumber ORDER BY OrderDate DESC) AS RANK_1
	FROM Room_Reservation 
	GROUP BY ClassNumber,RoomNumber,OrderDate
) SELECT * FROM A WHERE RANK_1=1
/*
  查询结果条数:24
  应有结果条数:22
  经过排查找到,是由于数据中存在某一个班在同一天预定了不同的教室,导致结果出现差异
  如:
	ClassNumber	RoomNumber	OrderDate	RANK_
	2014FC006	B栋1楼C2	20140607	1
	2014FC006	D栋1楼C2	20140607	1
	2014FC006	D栋1楼C2	20140507	3
*/

WITH A AS(
	SELECT ClassNumber,RoomNumber,OrderDate, 
	RANK() OVER(PARTITION BY ClassNumber ORDER BY OrderDate DESC) AS RANK_1,
	RANK() OVER(PARTITION BY OrderDate ORDER BY RoomNumber DESC) AS RANK_2
	FROM Room_Reservation 
	GROUP BY ClassNumber,RoomNumber,OrderDate
) SELECT * FROM A WHERE RANK_1=1 AND RANK_2=1
/*
  查询结果条数:15
  应有结果条数:22
  又一次失败了,仔细查看后,意识到Rank_2的排序不是基于Rank_1的排序上再排序的,理所应当出现此类问题
*/
WITH A AS(
	SELECT ClassNumber,RoomNumber,OrderDate, 
	RANK() OVER(PARTITION BY ClassNumber ORDER BY OrderDate DESC) AS RANK_1,
	RANK() OVER(PARTITION BY ClassNumber,OrderDate ORDER BY RoomNumber DESC) AS RANK_2
	FROM Room_Reservation 
	GROUP BY ClassNumber,RoomNumber,OrderDate
) SELECT * FROM A WHERE RANK_1=1 AND RANK_2=1

/*
  查询结果条数:22
  应有结果条数:22
  数量上符合了标准,经过抽样比对后发现,数据结果是符合要求的
  同事在看完这个语句后,做出了更进,很厉害
*/
WITH A AS(
	SELECT ClassNumber,RoomNumber,OrderDate, 
	RANK() OVER(PARTITION BY ClassNumber ORDER BY OrderDate DESC,RoomNumber DESC) AS RANK_
	FROM Room_Reservation 
	GROUP BY ClassNumber,RoomNumber,OrderDate
) SELECT * FROM A WHERE RANK_=1


/*
  总结:1.在多元变量且变量间为多对多关系的数据抓起,请先抓住其中一个点,然后依此为锚(anchor)再抓取
        2.所谓的取数据除了数据间的串接,更多的是数据集的排序,筛选出自己想要的数据
		3.分组后的Rank一定要抓准那个锚
		4.人外有人,山外有山
*/