# rescrape-missing.ps1 - Re-fetches 0-course and partial restaurants # using CDX API to find best available Wayback Machine snapshot $projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition $jsonPath = Join-Path $projectDir '2025-restaurants.json' $data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json function Decode-Html($str) { if (-not $str) { return $str } $s = $str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ' $s.Trim() } function Get-CleanText($rawHtml) { $t = $rawHtml -replace '<[^>]+>', ' ' $t = Decode-Html $t $t.Trim() } function Invoke-Dishes($courseHtml) { $dishes = [System.Collections.ArrayList]@() $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline $pMatches = [regex]::Matches($courseHtml, ']*>(.*?)

', $opts) foreach ($pm in $pMatches) { $pContent = $pm.Groups[1].Value if ($pContent -notmatch '') { continue } $nameM = [regex]::Match($pContent, '(.*?)', $opts) if (-not $nameM.Success) { continue } $name = Get-CleanText $nameM.Groups[1].Value if ($name -match '^(GF|GFA|V\+?|DF|V:|2025|Drink)') { continue } if ($name.Length -lt 3 -or $name.Length -gt 80) { continue } if ($name -match '^[A-Z]{1,3}:') { continue } $afterBr = '' if ($pContent -match '(?s)(.*?)$') { $afterBr = $matches[1] } else { $afterStrong = [regex]::Match($pContent, '(?s)(.*?)$', $opts) if ($afterStrong.Success) { $afterBr = $afterStrong.Groups[1].Value } } $desc = Get-CleanText $afterBr $null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc }) } return ,$dishes } function Invoke-CourseBlock($html, $courseLabel, $nextLabel) { $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline if ($nextLabel) { $pattern = [regex]::Escape($courseLabel) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')' $m = [regex]::Match($html, $pattern, $opts) if ($m.Success) { return $m.Groups[1].Value } } $idx = $html.IndexOf($courseLabel) if ($idx -ge 0) { $sub = $html.Substring($idx, [Math]::Min(6000, $html.Length - $idx)) $innerM = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!\s*\s*\s*\s* Updating with $($best.first.Count)/$($best.second.Count)/$($best.third.Count) courses" $r.menu.courses.'First Course' = @($best.first) $r.menu.courses.'Second Course' = @($best.second) $r.menu.courses.'Third Course' = @($best.third) if ($best.hours -and -not $r.menu.hours) { $r.menu.hours = $best.hours } if ($best.phone -and -not $r.menu.phone) { $r.menu.phone = $best.phone } } } else { Write-Host " -> No improvement found" } } catch { Write-Host " CDX ERROR: $_" } Start-Sleep -Milliseconds 500 } # ---- Final report ---- Write-Host "" Write-Host "=== Final Status ===" $data | Where-Object { $c1 = $_.menu.courses.'First Course'.Count $c2 = $_.menu.courses.'Second Course'.Count $c3 = $_.menu.courses.'Third Course'.Count ($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or ($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3) } | ForEach-Object { $c1 = $_.menu.courses.'First Course'.Count $c2 = $_.menu.courses.'Second Course'.Count $c3 = $_.menu.courses.'Third Course'.Count Write-Host " $($_.slug): $c1/$c2/$c3" } $json = $data | ConvertTo-Json -Depth 10 [System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8) Write-Host "" Write-Host "Saved to $jsonPath"