# fix-tavolata.ps1
# Run this after the Wayback Machine rate limit resets (wait ~30 minutes after last run)
# Recovers tavolata's Third Course using the same-block parser strategy
$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
$jsonPath = Join-Path $projectDir '2025-restaurants.json'
$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
function Decode-Html($str) {
if (-not $str) { return $str }
($str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ').Trim()
}
function Get-CleanText($rawHtml) { Decode-Html ($rawHtml -replace '<[^>]+>', ' ') }
function Test-DietaryTag($str) { $str -match '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' }
function Get-Dish($pContent) {
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
$bWithBrM = [regex]::Match($pContent, '(?s)(.*?)
', $opts)
if ($bWithBrM.Success) {
$name = Get-CleanText $bWithBrM.Groups[1].Value
if ($name.Length -ge 3 -and $name.Length -le 80 -and -not (Test-DietaryTag $name) -and $name -notmatch '^[A-Z]{1,3}:') {
return [PSCustomObject]@{ name = $name; desc = Get-CleanText ($pContent.Substring($bWithBrM.Index + $bWithBrM.Length)) }
}
}
$bM = [regex]::Match($pContent, '(?s)(.*?)', $opts)
if ($bM.Success) {
$namePart = Get-CleanText $bM.Groups[1].Value
if ($namePart.Length -ge 3 -and -not (Test-DietaryTag $namePart)) {
$afterB = $pContent.Substring($bM.Index + $bM.Length)
$sM2 = [regex]::Match($afterB, '(?s)^[^<]*(.*?)(.*)', $opts)
if ($sM2.Success) {
$p2 = Get-CleanText $sM2.Groups[1].Value
if (-not (Test-DietaryTag $p2) -and $p2.Length -ge 2) {
return [PSCustomObject]@{ name = "$namePart $p2".Trim(); desc = Get-CleanText $sM2.Groups[2].Value }
}
}
return [PSCustomObject]@{ name = $namePart; desc = Get-CleanText $afterB }
}
}
$sM = [regex]::Match($pContent, '(?s)(.*?)', $opts)
if ($sM.Success) {
$name = Get-CleanText $sM.Groups[1].Value
if ($name.Length -lt 3 -or $name.Length -gt 80 -or (Test-DietaryTag $name) -or $name -match '^[A-Z]{1,3}:') { return $null }
$afterBr = ''
if ($pContent -match '(?s)
(.*?)$') { $afterBr = $matches[1] }
else { $am = [regex]::Match($pContent, '(?s)(.*?)$', $opts); if ($am.Success) { $afterBr = $am.Groups[1].Value } }
return [PSCustomObject]@{ name = $name; desc = Get-CleanText $afterBr }
}
return $null
}
function Get-Dishes($courseHtml) {
$dishes = [System.Collections.ArrayList]@()
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
foreach ($pm in [regex]::Matches($courseHtml, '(?s)
]*>(.*?)
', $opts)) { $pc = $pm.Groups[1].Value if ($pc -notmatch '|') { continue } $d = Get-Dish $pc if ($d -and $d.name) { $null = $dishes.Add($d) } } return ,$dishes } function Get-CourseBlock($html, $label, $nextLabel) { $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline if ($nextLabel) { $m = [regex]::Match($html, ([regex]::Escape($label) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'), $opts) if ($m.Success) { return $m.Groups[1].Value } } $idx = $html.IndexOf($label) if ($idx -ge 0) { $sub = $html.Substring($idx, [Math]::Min(8000, $html.Length - $idx)) $sameDivM = [regex]::Match($sub, '(?s)\s*((?!