param( [Parameter(Mandatory=$true)] [string]$BaseUrl, [string]$OutputPath = "sitemap.xml", [int]$MaxDepth = 3, [string[]]$ExcludePatterns = @() ) function Get-AbsoluteUrl { param( [string]$Base, [string]$Relative ) try { $uri = New-Object System.Uri ([System.Uri]$Base), $Relative return $uri.AbsoluteUri } catch { return $null } } $visited = [System.Collections.Generic.HashSet[string]]::new() $baseUri = [System.Uri]$BaseUrl $hostName = $baseUri.Host function Should-Exclude { param([string]$Url) foreach ($pattern in $ExcludePatterns) { if ($Url -like $pattern) { return $true } } return $false } function Crawl { param( [string]$Url, [int]$Depth ) if ($Depth -gt $MaxDepth) { return } if ($visited.Contains($Url)) { return } if (Should-Exclude $Url) { return } $visited.Add($Url) | Out-Null try { $response = Invoke-WebRequest -Uri $Url -UseBasicParsing -ErrorAction Stop } catch { return } if ($response.Headers["Content-Type"] -notmatch "text/html") { return } $links = $response.Links | Where-Object { $_.href } | ForEach-Object { $_.href } foreach ($link in $links) { $abs = Get-AbsoluteUrl -Base $Url -Relative $link if (-not $abs) { continue } $linkUri = [System.Uri]$abs if ($linkUri.Host -ne $hostName) { continue } Crawl -Url $abs -Depth ($Depth + 1) } } Crawl -Url $BaseUrl -Depth 0 $ns = "http://www.sitemaps.org/schemas/sitemap/0.9" $xmlDoc = New-Object System.Xml.XmlDocument $root = $xmlDoc.CreateElement("urlset", $ns) $xmlDoc.AppendChild($root) | Out-Null foreach ($url in $visited) { $urlElem = $xmlDoc.CreateElement("url", $ns) $loc = $xmlDoc.CreateElement("loc", $ns) $loc.InnerText = $url $urlElem.AppendChild($loc) | Out-Null $root.AppendChild($urlElem) | Out-Null } $xmlWriterSettings = New-Object System.Xml.XmlWriterSettings $xmlWriterSettings.Indent = $true $xmlWriterSettings.Encoding = [System.Text.Encoding]::UTF8 $writer = [System.Xml.XmlWriter]::Create($OutputPath, $xmlWriterSettings) $xmlDoc.WriteTo($writer) $writer.Flush() $writer.Close() Write-Host "Sitemap generated at '$OutputPath' with $($visited.Count) URLs."