Forum Discussion

SayantanGanguly's avatar
SayantanGanguly
Copper Contributor
Jun 27, 2023

Not getting all the documents from SHarePoint Site using Powershell

Hello, I am trying to get 95000 documents from sharepoint site and convert it into .csv by SharePoint Online Management Powershell.

I am not able to get all of the files.

Below is my code:

#Load SharePoint CSOM Assemblies
Add-Type -Path "C:\Program Files\SharePoint Online Management Shell\Microsoft.Online.SharePoint.PowerShell\Microsoft.SharePoint.Client.dll" 
Add-Type -Path "C:\Program Files\SharePoint Online Management Shell\Microsoft.Online.SharePoint.PowerShell\Microsoft.SharePoint.Client.Runtime.dll" 
Add-Type -Path "C:\Program Files\SharePoint Online Management Shell\Microsoft.Online.SharePoint.PowerShell\Microsoft.Online.SharePoint.Client.Tenant.dll" 
    
#Function to Generate Report on all documents in a SharePoint Online Site Collection
Function Get-SPODocumentInventory($SiteURL)
{
    Try {
        #Setup the context
        $Ctx = New-Object Microsoft.SharePoint.Client.ClientContext($SiteURL)
        $Ctx.Credentials = $Credentials
    
        #Get the web from given URL and its subsites
        $Web = $Ctx.web
        $Ctx.Load($Web)
        $Ctx.Load($Web.Lists)
        $Ctx.Load($web.Webs)
        $Ctx.executeQuery()
  
        #Arry to Skip System Lists and Libraries
        $SystemLists =@("Converted Forms", "Master Page Gallery", "Customized Reports", "Form Templates", "List Template Gallery", "Theme Gallery",
               "Reporting Templates", "Solution Gallery", "Style Library", "Web Part Gallery","Site Assets", "wfpub", "Site Pages", "Images")
      
        Write-host -f Yellow "Processing Site: $SiteURL"
  
        #Filter Document Libraries to Scan
        $Lists = $Web.Lists | Where {$_.BaseType -eq "DocumentLibrary" -and $_.Hidden -eq $false -and $SystemLists -notcontains $_.Title -and $_.ItemCount -gt 0}
        #Loop through each document library
        Foreach ($List in $Lists)
        {
            #Define CAML Query to Get List Items in batches
            $Query = New-Object Microsoft.SharePoint.Client.CamlQuery
            $Query.ViewXml ="
                <View Scope='RecursiveAll'>
                   <Query>
                      <OrderBy><FieldRef Name='ID' Ascending='TRUE'/></OrderBy>
                   </Query>
                   <RowLimit Paged='TRUE'>$BatchSize</RowLimit>
                </View>"
  
            Write-host -f Cyan "`t Processing Document Library: '$($List.Title)' with $($List.ItemCount) Item(s)"
  
            Do {
                #Get List items
                $ListItems = $List.GetItems($Query)
                $Ctx.Load($ListItems)
                $Ctx.ExecuteQuery()
 
                #Filter Files
                $Files = $ListItems | Where { $_.FileSystemObjectType -eq "File"}
 
                #Iterate through each file and get data
                $DocumentInventory = @()
                Foreach($Item in $Files)
                {
                    $File = $Item.File
                    $Ctx.Load($File)
                    $Ctx.ExecuteQuery()
  
                    $DocumentData = New-Object PSObject
                    $DocumentData | Add-Member NoteProperty SiteURL($SiteURL)
                    $DocumentData | Add-Member NoteProperty DocLibraryName($List.Title)
                    $DocumentData | Add-Member NoteProperty FileName($File.Name)
                    $DocumentData | Add-Member NoteProperty FileURL($File.ServerRelativeUrl)
                    $DocumentData | Add-Member NoteProperty CreatedBy($Item["Author"].Email)
                    $DocumentData | Add-Member NoteProperty CreatedOn($File.TimeCreated)
                    $DocumentData | Add-Member NoteProperty ModifiedBy($Item["Editor"].Email)
                    $DocumentData | Add-Member NoteProperty LastModifiedOn($File.TimeLastModified)
                    $DocumentData | Add-Member NoteProperty FileIcon($File.DocIcon)
    $DocumentData | Add-Member NoteProperty GUID($List.Id)
 
                        
                    #Add the result to an Array
                    $DocumentInventory += $DocumentData
                }
                #Export the result to CSV file
                $DocumentInventory | Export-CSV $ReportOutput -NoTypeInformation -Append
                $Query.ListItemCollectionPosition = $ListItems.ListItemCollectionPosition
            } While($Query.ListItemCollectionPosition -ne $null)
        }
           
        #Iterate through each subsite of the current web and call the function recursively
        ForEach ($Subweb in $Web.Webs)
        {
            #Call the function recursively to process all subsites underneaththe current web
            Get-SPODocumentInventory($Subweb.url)
        }
    }
    Catch {
        write-host -f Red "Error Generating CSV. Please contact with Administrator" $_.Exception.Message
    }
}
   
#Config Parameters
$ReportOutput="C:\temp\BubEx1.csv"
$BatchSize = 500
 
#Setup Credentials to connect
$Cred= Get-Credential
$Credentials = New-Object Microsoft.SharePoint.Client.SharePointOnlineCredentials($Cred.Username, $Cred.Password)
   
#Delete the Output Report, if exists
if (Test-Path $ReportOutput) { Remove-Item $ReportOutput }
   
#Call the function
Get-SPODocumentInventory $SiteCollURL
  • You need to make use of Pagingation, as you will be limited to 5000 items! My recommendation for this would be to actually use Power Automate to do this, It's alot easier

Resources