# fix-tavolata.ps1 # Run this after the Wayback Machine rate limit resets (wait ~30 minutes after last run) # Recovers tavolata's Third Course using the same-block parser strategy $projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition $jsonPath = Join-Path $projectDir '2025-restaurants.json' $data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json function Decode-Html($str) { if (-not $str) { return $str } ($str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ').Trim() } function Get-CleanText($rawHtml) { Decode-Html ($rawHtml -replace '<[^>]+>', ' ') } function Test-DietaryTag($str) { $str -match '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' } function Get-Dish($pContent) { $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline $bWithBrM = [regex]::Match($pContent, '(?s)(.*?)', $opts) if ($bWithBrM.Success) { $name = Get-CleanText $bWithBrM.Groups[1].Value if ($name.Length -ge 3 -and $name.Length -le 80 -and -not (Test-DietaryTag $name) -and $name -notmatch '^[A-Z]{1,3}:') { return [PSCustomObject]@{ name = $name; desc = Get-CleanText ($pContent.Substring($bWithBrM.Index + $bWithBrM.Length)) } } } $bM = [regex]::Match($pContent, '(?s)(.*?)', $opts) if ($bM.Success) { $namePart = Get-CleanText $bM.Groups[1].Value if ($namePart.Length -ge 3 -and -not (Test-DietaryTag $namePart)) { $afterB = $pContent.Substring($bM.Index + $bM.Length) $sM2 = [regex]::Match($afterB, '(?s)^[^<]*(.*?)(.*)', $opts) if ($sM2.Success) { $p2 = Get-CleanText $sM2.Groups[1].Value if (-not (Test-DietaryTag $p2) -and $p2.Length -ge 2) { return [PSCustomObject]@{ name = "$namePart $p2".Trim(); desc = Get-CleanText $sM2.Groups[2].Value } } } return [PSCustomObject]@{ name = $namePart; desc = Get-CleanText $afterB } } } $sM = [regex]::Match($pContent, '(?s)(.*?)', $opts) if ($sM.Success) { $name = Get-CleanText $sM.Groups[1].Value if ($name.Length -lt 3 -or $name.Length -gt 80 -or (Test-DietaryTag $name) -or $name -match '^[A-Z]{1,3}:') { return $null } $afterBr = '' if ($pContent -match '(?s)(.*?)$') { $afterBr = $matches[1] } else { $am = [regex]::Match($pContent, '(?s)(.*?)$', $opts); if ($am.Success) { $afterBr = $am.Groups[1].Value } } return [PSCustomObject]@{ name = $name; desc = Get-CleanText $afterBr } } return $null } function Get-Dishes($courseHtml) { $dishes = [System.Collections.ArrayList]@() $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline foreach ($pm in [regex]::Matches($courseHtml, '(?s)]*>(.*?)

', $opts)) { $pc = $pm.Groups[1].Value if ($pc -notmatch '|') { continue } $d = Get-Dish $pc if ($d -and $d.name) { $null = $dishes.Add($d) } } return ,$dishes } function Get-CourseBlock($html, $label, $nextLabel) { $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline if ($nextLabel) { $m = [regex]::Match($html, ([regex]::Escape($label) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'), $opts) if ($m.Success) { return $m.Groups[1].Value } } $idx = $html.IndexOf($label) if ($idx -ge 0) { $sub = $html.Substring($idx, [Math]::Min(8000, $html.Length - $idx)) $sameDivM = [regex]::Match($sub, '(?s)\s*()', $opts) if ($sameDivM.Success -and $sameDivM.Groups[1].Value -match '(?!\s*\s*\s*\s* $($first.Count)/$($second.Count)/$($third.Count)" if ($third.Count -gt 0) { if ($first.Count -gt 0) { $r.menu.courses.'First Course' = @($first) } if ($second.Count -gt 0) { $r.menu.courses.'Second Course' = @($second) } $r.menu.courses.'Third Course' = @($third) Write-Host "SUCCESS! tavolata Third Course recovered." -ForegroundColor Green $success = $true } else { Write-Host " Third Course still empty, trying next timestamp..." } } catch { Write-Host " ERROR: $_" -ForegroundColor Red } Start-Sleep -Seconds 10 } if (-not $success) { Write-Host "Could not recover tavolata Third Course. Try again later." -ForegroundColor Yellow } else { $json = $data | ConvertTo-Json -Depth 10 [System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8) Write-Host "Saved to $jsonPath" }