First I created a powershell script to download the list and output it into a regex string like this:
Code: Select all
PubSufRegEx = "^ac$|^com\.ac$|^edu\.ac$|^gov\.ac$|^net\.ac$|^mil\.ac$|^org\.ac$|...REST OF LIST ABRIDGED...|^ad$"
The problem I'm having is getting the correct result. I'm split the domain into pieces then test each part against the regex string. This is the part that doesn't work. It seems to match anything. However, if I remove the "function" business and just run the code, then it does work. I'm obviously missing something stupid that I just can't put my finger on.
Here's my vbs test script:
Code: Select all
Option Explicit
Function Include(sInstFile)
Dim f, s, oFSO
Set oFSO = CreateObject("Scripting.FileSystemObject")
On Error Resume Next
If oFSO.FileExists(sInstFile) Then
Set f = oFSO.OpenTextFile(sInstFile)
s = f.ReadAll
f.Close
ExecuteGlobal s
End If
On Error Goto 0
End Function
Function Lookup(strRegEx, strMatch) : Lookup = False
With CreateObject("VBScript.RegExp")
.Pattern = strRegEx
.Global = False
.MultiLine = True
.IgnoreCase = True
If .Test(strMatch) Then Lookup = True
End With
End Function
Function oLookup(strRegEx, strMatch, bGlobal)
If strRegEx = "" Then strRegEx = StrReverse(strMatch)
With CreateObject("VBScript.RegExp")
.Pattern = strRegEx
.Global = bGlobal
.MultiLine = True
.IgnoreCase = True
Set oLookup = .Execute(strMatch)
End With
End Function
Function CheckDomain(strDomain)
Dim strRegEx, PubSufRegEx, Match, Matches
Dim TLDTopLevel, TLDSecondLevel, TLDThirdLevel, TLDFourthLevel, TLDFifthLevel, Domain
Include("C:\scripts\hmailserver\FWBan\PublicSuffix\public_suffix_list.vbs")
strRegEx = "[A-Za-z]+$"
Set Matches = oLookup(strRegEx, strDomain, False)
For Each Match In Matches
TLDTopLevel = LCase(Match.Value)
Next
strRegEx = "[A-Za-z0-9-]+\.[a-z]+$"
Set Matches = oLookup(strRegEx, strDomain, False)
For Each Match In Matches
TLDSecondLevel = LCase((Split(Match.Value, "."))(0))
Next
strRegEx = "[A-Za-z0-9-]+\.[A-Za-z0-9-]+\.[a-z]+$"
Set Matches = oLookup(strRegEx, strDomain, False)
For Each Match In Matches
TLDThirdLevel = LCase((Split(Match.Value, "."))(0))
Next
strRegEx = "[A-Za-z0-9-]+\.[A-Za-z0-9-]+\.[A-Za-z0-9-]+\.[a-z]+$"
Set Matches = oLookup(strRegEx, strDomain, False)
For Each Match In Matches
TLDFourthLevel = LCase((Split(Match.Value, "."))(0))
Next
strRegEx = "[A-Za-z0-9-]+\.[A-Za-z0-9-]+\.[A-Za-z0-9-]+\.[A-Za-z0-9-]+\.[a-z]+$"
Set Matches = oLookup(strRegEx, strDomain, False)
For Each Match In Matches
TLDFifthLevel = LCase((Split(Match.Value, "."))(0))
Next
If Lookup(PubSufRegEx, TLDTopLevel) Then
Domain = TLDSecondLevel & "." & TLDTopLevel
End If
If Lookup(PubSufRegEx, (TLDSecondLevel & "." & TLDTopLevel)) Then
Domain = TLDThirdLevel & "." & TLDSecondLevel & "." & TLDTopLevel
End If
If Lookup(PubSufRegEx, (TLDThirdLevel & "." & TLDSecondLevel & "." & TLDTopLevel)) Then
Domain = TLDFourthLevel & "." & TLDThirdLevel & "." & TLDSecondLevel & "." & TLDTopLevel
End If
If Lookup(PubSufRegEx, (TLDFourthLevel & "." & TLDThirdLevel & "." & TLDSecondLevel & "." & TLDTopLevel)) Then
Domain = TLDFifthLevel & "." & TLDFourthLevel & "." & TLDThirdLevel & "." & TLDSecondLevel & "." & TLDTopLevel
End If
CheckDomain = Domain
End Function
Dim strDomain, DomainName
' strDomain = "pool-72-76-221-125.nwrknj.fios.verizon.net"
' strDomain = "msnbot-207-46-13-192.search.msn.com"
' strDomain = "SKNcd-03p13-20.ppp11.odn.ad.jp"
' strDomain = "v150-95-128-207.a079.g.TyO1.STATIC.cnode.io"
' strDomain = "230.19.7.186.f.dyn.claro.net.do"
' strDomain = "hotelbrisamaral.static.gvt.net.br"
strDomain = "6thLevel.5thLevel.hotelbrisamaral.rs.gov.br"
WScript.Echo CheckDomain(strDomain)
And here is my powershell (working):
Code: Select all
<#
.SYNOPSIS
.DESCRIPTION
.FUNCTIONALITY
.NOTES
.EXAMPLE
#>
<# Set script variables #>
$URL = "https://publicsuffix.org/list/public_suffix_list.dat"
$PubSufFile = "$PSScriptRoot\public_suffix_list.dat"
$CondensedDatList = "$PSScriptRoot\public_suffix_list.vbs"
<# Download latest Public Suffix data #>
$LastDownloadTime = (Get-Item $PubSufFile).LastWriteTime
$HoursSinceLastDownload = [int](New-Timespan $LastDownloadTime).TotalHours
If ($HoursSinceLastDownload -gt 23){
Try {
Start-BitsTransfer -Source $URL -Destination $PubSufFile -ErrorAction Stop
}
Catch {
Write-Host "Error downloading Public Suffix List: `n$Error[0]"
Exit
}
}
<# Read data file and output list #>
Get-Content $PubSufFile | Where {((-not([string]::IsNullOrEmpty($_))) -and ($_ -notmatch "^//|^\*|^\!"))} | ForEach {
Write-Output "^$_$"
} | Out-File $CondensedDatList
<# Convert list to RegEx pattern #>
(Get-Content -Path $CondensedDatList) -Replace '$','|' | Set-Content -NoNewline -Path $CondensedDatList
(Get-Content -Path $CondensedDatList) -Replace '\.','\.' | Set-Content -NoNewline -Path $CondensedDatList
(Get-Content -Path $CondensedDatList) -Replace '^','PubSufRegEx = "' | Set-Content -NoNewline -Path $CondensedDatList
(Get-Content -Path $CondensedDatList) -Replace '\|$','' | Set-Content -NoNewline -Path $CondensedDatList
(Get-Content -Path $CondensedDatList) -Replace '$','"' | Set-Content -NoNewline -Path $CondensedDatList