As their names denote, aggregate functions are used to summarize information across the whole data set or based on several attributes. The following queries exemplify the use of aggregates based on the tables created in a
previous post:
-- aggreates over the whole table
SELECT COUNT(*) NoRecords
, MIN(StartDate) StartDate
, MAX(EndDate) EndDate
, DateDiff(d, MIN(StartDate), MAX(EndDate)) [DiffDays]
FROM dbo.T_Allocations A
-- aggregates based on several several attributes
SELECT A.CourseId
, A.StudentId
, COUNT(*) NoRecords
, MIN(StartDate) StartDate
, MAX(EndDate) EndDate
, DateDiff(d, MIN(StartDate), MAX(EndDate)) [DiffDays]
FROM dbo.T_Allocations A
GROUP BY A.CourseId
, A.StudentId
Correlated subqueries can be of help either inside of a SELECT or an CROSS, respectively OUTER APPLY:
-- correlated subquery within a SELECT
SELECT *
, (SELECT COUNT(*) NoRecords
FROM dbo.T_Allocations A
WHERE C.CourseId = A.CourseId
GROUP BY A.CourseId) NoRecords
FROM dbo.T_Courses C
-- CROSS APPLY (correlated subquery)
SELECT *
, A.NoRecords
, A.StartDate
FROM dbo.T_Courses C
CROSS APPLY (
SELECT A.CourseId
, COUNT(*) NoRecords
, MIN(StartDate) StartDate
FROM dbo.T_Allocations A
WHERE C.CourseId = A.CourseId
GROUP BY A.CourseId
) A
ORDER BY C.CourseName
-- OUTER APPLY (correlated subquery)
SELECT *
, A.NoRecords
, A.StartDate
FROM dbo.T_Courses C
OUTER APPLY (
SELECT A.CourseId
, COUNT(*) NoRecords
, MIN(StartDate) StartDate
FROM dbo.T_Allocations A
WHERE C.CourseId = A.CourseId
GROUP BY A.CourseId
) A
ORDER BY C.CourseName
Aggregates are useful in a series of techniques like the listing of duplicates:
-- identifying duplicates via an aggregate
SELECT A.*
FROM dbo.T_Allocations A
JOIN (
SELECT A.CourseId
, A.StudentId
, COUNT(*) NoRecords
FROM dbo.T_Allocations A
GROUP BY A.CourseId
, A.StudentId
HAVING COUNT(*)>1
) DUP
ON A.CourseId = DUP.CourseId
AND A.StudentId = DUP.Studentid
ORDER BY A.CourseId
, A.StudentId
A similar technique can be used to remove the duplicates from the base table:
-- removing the duplicates except the last record
DELETE dbo.T_Allocations
FROM (
SELECT A.StudentId
, A.CourseId
, A.StartDate
, A.EndDate
, Max(A.AllocationId) AllocationId
FROM dbo.T_Allocations A
GROUP BY A.StudentId
, A.CourseId
, A.StartDate
, A.EndDate
HAVING count(*)>1
) A
WHERE dbo.T_Allocations.CourseId = A.CourseId
AND dbo.T_Allocations.StudentId = A.StudentId
AND dbo.T_Allocations.StartDate = A.StartDate
AND dbo.T_Allocations.EndDate = A.EndDate
AND dbo.T_Allocations.AllocationId <> A.AllocationId
When the number of values is fixed and unique within a grouping, one can use aggregate functions to display the values within a matrix:
-- Matrix display via aggregates
SELECT S.StudentId
, S.StudentName
, CASE WHEN A.Course1 = 1 THEN 'x' ELSE '-' END Course1
, CASE WHEN A.Course2 = 1 THEN 'x' ELSE '-' END Course2
, CASE WHEN A.Course3 = 1 THEN 'x' ELSE '-' END Course3
, CASE WHEN A.Course4 = 1 THEN 'x' ELSE '-' END Course4
, CASE WHEN A.Course5 = 1 THEN 'x' ELSE '-' END Course5
FROM dbo.T_Students S
LEFT JOIN (
SELECT A.StudentId
, Max(CASE WHEN A.CourseId = 1 THEN 1 ELSE 0 END) Course1
, Max(CASE WHEN A.CourseId = 2 THEN 1 ELSE 0 END) Course2
, Max(CASE WHEN A.CourseId = 3 THEN 1 ELSE 0 END) Course3
, Max(CASE WHEN A.CourseId = 4 THEN 1 ELSE 0 END) Course4
, Max(CASE WHEN A.CourseId = 5 THEN 1 ELSE 0 END) Course5
FROM dbo.T_Allocations A
GROUP BY A.StudentId
) A
ON S.StudentId = A.StudentId
ORDER BY S.StudentName
No comments:
Post a Comment