Example: Index fragmentation with insert/updates, measuring it and fixing it

Published Mar 23 2019 05:14 AM 193 Views
Microsoft
First published on MSDN on Feb 29, 2008

This blog shows a very simple example to illustrate data fragmentation, measuring it, seeing behind the scene data and steps to address it. I recommend reading the overall series on index fragementation at

http://blogs.msdn.com/sqlserverstorageengine/archive/tags/Index+Fragmentation+Series/default.as...





-- TSQL Script


create database indextest


go




use indextest


go




-- create the index after loading the data


drop table t_ci


go



create table t_ci (c1 int, c2 char (100), c3 int, c4 varchar(1000))


go




-- load the data


declare @i int


select @i = 0


while (@i < 1000)


begin


insert into t_ci values (@i, 'hello', @i+10000, replicate ('a', 100))


set @i = @i + 1


end



-- find fragmentation


select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent


from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')



OUTPUT


-- main cause of fragmentation is that initial page allocation is using mixed extent


avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent


---------------------------- -------------------------- -------------------- ------------------------------


40                           7.25                       4                    94.127625401532



(1 row(s) affected)



--find extent allocations. Note, first 8 pages are allocated from mixed extents


-- in the output below, I removed some columns that are not relevant here.


dbcc extentinfo (0, 't_ci', 0)



OUTPUT


file_id     page_id     pg_alloc    ext_size    object_id   index_id    partition_number


----------- ----------- ----------- ----------- ----------- ----------- ----------------


1           153         1           1           2137058649  0           1


1           155         1           1           2137058649  0           1


1           156         1           1           2137058649  0           1


1           157         1           1           2137058649  0           1


1           158         1           1           2137058649  0           1


1           159         1           1           2137058649  0           1


1           180         1           1           2137058649  0           1


1           181         1           1           2137058649  0           1


1           208         8           8           2137058649  0           1


1           216         8           8           2137058649  0           1


1           224         5           8           2137058649  0           1



-- this shows all the pages alllocated to t_ci


-- I looked at the pages and I found total of 30 pages with 4 fragements. So the average size is 7.5


dbcc ind ('indextest', 't_ci', 0)




-- create the clustered index


create clustered index ci on t_ci(c1)


go



-- measure the fragementation


select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent


from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')




OUTPUT


-- you don't see any fragementation because SQL Server allocates uniform extents.


-- In the output below, I removed the row for non-leaf index pages


avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent


---------------------------- -------------------------- -------------------- ------------------------------


0                            28                         1                    98.3726958240672



-- this shows all the pages alllocated to t_ci


-- I looked at the pages and I found that logical/physical order was perfactly alinged.


-- also, all pages were allocated from uniform extents except for PFS page.


dbcc ind ('indextest', 't_ci', 1)



-- here is the output of page allocations


dbcc extentinfo (0, 't_ci', 1)


file_id     page_id     pg_alloc    ext_size    object_id   index_id    partition_number


----------- ----------- ----------- ----------- ----------- ----------- ----------------


1           183         1           1           2137058649  1           1


1           232         8           8           2137058649  1           1


1           240         8           8           2137058649  1           1


1           248         8           8           2137058649  1           1





-- create the index and load the data. This is different from the previous example


-- as here we create the index on the empty table and then load the data.



drop table t_ci


go



create table t_ci (c1 int, c2 char (100), c3 int, c4 varchar(1000))


go



-- create the clustered index


create clustered index ci on t_ci(c1)


go



-- load the data


declare @i int


select @i = 0


while (@i < 1000)


begin


insert into t_ci values (@i, 'hello', @i+10000, replicate ('a', 100))


set @i = @i + 1


end




-- measure the fragementation


select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent


from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')



-- output: again, you see the fragementaion becuase first 8 page allcoations come from mixed page extents


-- This is because when you are loading into empty table, SQL Server cannot predict its size. So


-- it does mixed page allocations. This is different when you create the indec


-- after loading the data because at that time it knows the size of the index.


-- Also note that the average used percentage in pages was close to 100%. This means that


-- the pages were not split in half. This is one of the optimizations SQL Server does when


-- loading ordered data.


avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent


---------------------------- -------------------------- -------------------- ------------------------------


14.2857142857143             5.6                        5                    98.3726958240672




--find extent allocations. Note, first 8 pages are allocated from mixed extents


dbcc extentinfo (0, 't_ci', 1)


file_id     page_id     pg_alloc    ext_size    object_id   index_id    partition_number


----------- ----------- ----------- ----------- ----------- ----------- ----------------


1           180         1           1           5575058     1           1


1           182         1           1           5575058     1           1


1           183         1           1           5575058     1           1


1           142         1           1           5575058     1           1


1           143         1           1           5575058     1           1


1           153         1           1           5575058     1           1


1           154         1           1           5575058     1           1


1           155         1           1           5575058     1           1


1           192         8           8           5575058     1           1


1           200         8           8           5575058     1           1


1           208         5           8           5575058     1           1



drop index ci on t_ci


-- create the clustered index to start with unfragmented data


create clustered index ci on t_ci(c1)


go



-- update all rows such that each row 900 bytes. This will cause page splits thereby


-- lead to fragmentation


update t_ci set c4 = replicate ('b', 1000)



-- measure the fragementation


select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent


from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')



-- Severe fragementation because of page splits. Also note that the space usage in each page is around 50%


-- This affects the performance in two ways (1) you will need to read more pages to get to the


-- same number of rows as pages are 1/2 full. (2) Since there are 117 fragements, it will cause


-- more physical IOs if we are scanning for a range of keys.


--


-- Just to emphasize, the fragmentation happens because of DML operations. So if there is very little


-- DML activity on a table, it may not get fragmented.  Other important point to understand


-- is that fragementaion ONLY impacts range queries but NOT singleton selects.So again if


-- you determine that most access thru an index are singleton SELECT, there is no performance benefit


-- in defragmenting it except of course you will be able to space by compacting the data on the page.


--


avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent


---------------------------- -------------------------- -------------------- ------------------------------


40.4332129963899             2.40869565217391           115                  50.0636520879664



--find extent allocations. Note, that the pages are allocated from mixed extents only. This


-- is because the table is already bigger than the 8 page threshold


dbcc extentinfo (0, 't_ci', 1)


file_id     page_id     pg_alloc    ext_size    object_id   index_id    partition_number


----------- ----------- ----------- ----------- ----------- ----------- ----------------


1           183         1           1           5575058     1           1


1           440         8           8           5575058     1           1


1           448         8           8           5575058     1           1


1           456         8           8           5575058     1           1


1           464         8           8           5575058     1           1


1           472         8           8           5575058     1           1


1           480         8           8           5575058     1           1


1           488         8           8           5575058     1           1


1           496         8           8           5575058     1           1


1           504         8           8           5575058     1           1


1           512         8           8           5575058     1           1


1           520         8           8           5575058     1           1


1           528         8           8           5575058     1           1


1           536         8           8           5575058     1           1


1           544         8           8           5575058     1           1


1           552         8           8           5575058     1           1


1           560         8           8           5575058     1           1


1           568         8           8           5575058     1           1


1           576         7           8           5575058     1           1




-- you can use the following command to show all the pages alllocated to t_ci and their links.


dbcc ind ('indextest', 't_ci', 1)



-- Now do an index defrag.


alter index ci on t_ci reorganize



-- measure the fragementation


select avg_fragmentation_in_percent, avg_fragment_size_in_pages, fragment_count, avg_page_space_used_in_percent


from sys.dm_db_index_physical_stats (DB_ID(), object_id('t_ci'), NULL, NULL, 'DETAILED')



-- note that the fragmentation was reduced significantly and the space usage on pages is around 100%. In other


-- words, the defragementation makes the physical order same as the logical order and compacts the pages.


-- The reason you see the fragementation is that the 'Reorganize' command does not allocate new extents


-- as index rebuild command. It works with the already allocated pages and moves the data around and then


-- deallocates pages not needed. If you choose to 'rebuild' the index, then the fragmentation will be completely


-- removed but you must question if it is worth it? Index Reorganize takes very little space (one extra page) and


-- breaks the work into smaller transactions. So you minimze large log chain and space overhead.On the flip side,


-- it is single threaded and fully logged operation.



avg_fragmentation_in_percent avg_fragment_size_in_pages fragment_count       avg_page_space_used_in_percent


---------------------------- -------------------------- -------------------- ------------------------------


3.4965034965035              10.2142857142857           14                   98.3726958240672


Version history
Last update:
‎Mar 23 2019 05:14 AM
Updated by: