Cleaned up the 2025 temp stuff. There is 9 restaurants that don't have information and will need to be manually updated

This commit is contained in:
2026-02-24 21:50:05 -08:00
parent ab9abdb53e
commit b455bae614
3 changed files with 20 additions and 133 deletions

View File

@@ -5807,7 +5807,7 @@
"desc": "Lady Rose Apple, pink peppercorn, thyme, crostini V Add house-made focaccia $5" "desc": "Lady Rose Apple, pink peppercorn, thyme, crostini V Add house-made focaccia $5"
}, },
{ {
"name": "Tavolàta", "name": "Salad",
"desc": "Little gem lettuce, chicory, pistachio, red onion, Calabrian vinaigrette, pecorino GF V" "desc": "Little gem lettuce, chicory, pistachio, red onion, Calabrian vinaigrette, pecorino GF V"
}, },
{ {
@@ -5825,12 +5825,23 @@
"desc": "Black pepper, butter, pecorino romano GFA" "desc": "Black pepper, butter, pecorino romano GFA"
}, },
{ {
"name": "Roasted", "name": "Pork Loin",
"desc": "Fig mostarda, radicchio, hazelnut bread crumb GFA" "desc": "Fig mostarda, radicchio, hazelnut bread crumb GFA"
} }
], ],
"Third Course": [ "Third Course": [
{
"name": "Zeppole",
"desc": "Lemon doughnuts, dark chocolate sauce V"
},
{
"name": "Tiramasu",
"desc": "Mascarpone, amaretto, espresso, lady finger V"
},
{
"name": "Sorbetto or Gelato",
"desc": "Salted shortbread cookie GFA V"
}
] ]
} }
} }

View File

@@ -1,123 +0,0 @@
# fix-tavolata.ps1
# Run this after the Wayback Machine rate limit resets (wait ~30 minutes after last run)
# Recovers tavolata's Third Course using the same-block parser strategy
$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
$jsonPath = Join-Path $projectDir '2025-restaurants.json'
$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
function Decode-Html($str) {
if (-not $str) { return $str }
($str -replace '&amp;','&' -replace '&#039;',"'" -replace '&quot;','"' -replace '&lt;','<' -replace '&gt;','>' -replace '&nbsp;',' ' -replace '\s+',' ').Trim()
}
function Get-CleanText($rawHtml) { Decode-Html ($rawHtml -replace '<[^>]+>', ' ') }
function Test-DietaryTag($str) { $str -match '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' }
function Get-Dish($pContent) {
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
$bWithBrM = [regex]::Match($pContent, '(?s)<b>(.*?)<br\s*/?>', $opts)
if ($bWithBrM.Success) {
$name = Get-CleanText $bWithBrM.Groups[1].Value
if ($name.Length -ge 3 -and $name.Length -le 80 -and -not (Test-DietaryTag $name) -and $name -notmatch '^[A-Z]{1,3}:') {
return [PSCustomObject]@{ name = $name; desc = Get-CleanText ($pContent.Substring($bWithBrM.Index + $bWithBrM.Length)) }
}
}
$bM = [regex]::Match($pContent, '(?s)<b>(.*?)</b>', $opts)
if ($bM.Success) {
$namePart = Get-CleanText $bM.Groups[1].Value
if ($namePart.Length -ge 3 -and -not (Test-DietaryTag $namePart)) {
$afterB = $pContent.Substring($bM.Index + $bM.Length)
$sM2 = [regex]::Match($afterB, '(?s)^[^<]*<strong>(.*?)</strong>(.*)', $opts)
if ($sM2.Success) {
$p2 = Get-CleanText $sM2.Groups[1].Value
if (-not (Test-DietaryTag $p2) -and $p2.Length -ge 2) {
return [PSCustomObject]@{ name = "$namePart $p2".Trim(); desc = Get-CleanText $sM2.Groups[2].Value }
}
}
return [PSCustomObject]@{ name = $namePart; desc = Get-CleanText $afterB }
}
}
$sM = [regex]::Match($pContent, '(?s)<strong>(.*?)</strong>', $opts)
if ($sM.Success) {
$name = Get-CleanText $sM.Groups[1].Value
if ($name.Length -lt 3 -or $name.Length -gt 80 -or (Test-DietaryTag $name) -or $name -match '^[A-Z]{1,3}:') { return $null }
$afterBr = ''
if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] }
else { $am = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts); if ($am.Success) { $afterBr = $am.Groups[1].Value } }
return [PSCustomObject]@{ name = $name; desc = Get-CleanText $afterBr }
}
return $null
}
function Get-Dishes($courseHtml) {
$dishes = [System.Collections.ArrayList]@()
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
foreach ($pm in [regex]::Matches($courseHtml, '(?s)<p[^>]*>(.*?)</p>', $opts)) {
$pc = $pm.Groups[1].Value
if ($pc -notmatch '<b>|<strong>') { continue }
$d = Get-Dish $pc
if ($d -and $d.name) { $null = $dishes.Add($d) }
}
return ,$dishes
}
function Get-CourseBlock($html, $label, $nextLabel) {
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
if ($nextLabel) {
$m = [regex]::Match($html, ([regex]::Escape($label) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'), $opts)
if ($m.Success) { return $m.Groups[1].Value }
}
$idx = $html.IndexOf($label)
if ($idx -ge 0) {
$sub = $html.Substring($idx, [Math]::Min(8000, $html.Length - $idx))
$sameDivM = [regex]::Match($sub, '(?s)</h[123]>\s*(<p.+?)(?=</div>)', $opts)
if ($sameDivM.Success -and $sameDivM.Groups[1].Value -match '<p') { return $sameDivM.Groups[1].Value }
$im = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts)
if ($im.Success) { return $im.Groups[1].Value }
}
return ''
}
$r = $data | Where-Object { $_.slug -eq 'tavolata' }
Write-Host "tavolata currently: $($r.menu.courses.'First Course'.Count)/$($r.menu.courses.'Second Course'.Count)/$($r.menu.courses.'Third Course'.Count)"
$timestamps = @('20250306132630','20250401000000','20250415000000','20250501000000')
$success = $false
foreach ($ts in $timestamps) {
if ($success) { break }
Write-Host "Trying timestamp $ts..." -NoNewline
try {
$url = "https://web.archive.org/web/$ts/https://inlanderrestaurantweek.com/project/tavolata/"
$resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 60 -ErrorAction Stop
$html = $resp.Content
if ($html -match '429 Too Many') { throw "Rate limited" }
$first = Get-Dishes (Get-CourseBlock $html 'First Course' 'Second Course')
$second = Get-Dishes (Get-CourseBlock $html 'Second Course' 'Third Course')
$third = Get-Dishes (Get-CourseBlock $html 'Third Course' $null)
Write-Host " -> $($first.Count)/$($second.Count)/$($third.Count)"
if ($third.Count -gt 0) {
if ($first.Count -gt 0) { $r.menu.courses.'First Course' = @($first) }
if ($second.Count -gt 0) { $r.menu.courses.'Second Course' = @($second) }
$r.menu.courses.'Third Course' = @($third)
Write-Host "SUCCESS! tavolata Third Course recovered." -ForegroundColor Green
$success = $true
} else {
Write-Host " Third Course still empty, trying next timestamp..."
}
} catch {
Write-Host " ERROR: $_" -ForegroundColor Red
}
Start-Sleep -Seconds 10
}
if (-not $success) {
Write-Host "Could not recover tavolata Third Course. Try again later." -ForegroundColor Yellow
} else {
$json = $data | ConvertTo-Json -Depth 10
[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8)
Write-Host "Saved to $jsonPath"
}

View File

@@ -35,15 +35,14 @@ Each entry in `YEAR-restaurants.json`:
``` ```
Price is always 25, 35, or 45. gardenparty genuinely has 4 Third Course options. Price is always 25, 35, or 45. gardenparty genuinely has 4 Third Course options.
## 2025 Data Status ## 2025 Data Status — COMPLETE
- **File**: `2025-restaurants.json` (121 restaurants) - **File**: `2025-restaurants.json` (121 restaurants)
- **Wayback snapshot used**: `20250306132630` (primary), `20250401000000` (backup for some) - **Wayback snapshot used**: `20250306132630`
- **Complete (3/3/3+)**: 111 restaurants - **Complete (3/3/3+)**: 112 restaurants
- **gardenparty**: 3/3/4 — correct, it genuinely offers 4 dessert choices - **gardenparty**: 3/3/4 — correct, it genuinely offers 4 dessert choices
- **tavolata**: 3/3/0needs fix-tavolata.ps1 run when rate limit resets - **tavolata**: 3/3/3FIXED (recovered Third Course from snapshot `20250306132630`)
- **0/0/0 (JS-only, unrecoverable)**: heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza - **0/0/0 (JS-only, unrecoverable)**: heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza
## Scripts in Project Directory ## Scripts in Project Directory
- `fix-tavolata.ps1`run after rate limit resets to recover tavolata Third Course - `fix-tavolata.ps1`already run, tavolata is complete; kept for reference
- Copy to local temp and run: `cp ...\fix-tavolata.ps1 C:\Users\derekc.CHNSLocal\AppData\Local\Temp\` - `check-2025.ps1` — validates all restaurant course counts
- Then: `powershell.exe -ExecutionPolicy Bypass -File C:\Users\derekc.CHNSLocal\AppData\Local\Temp\fix-tavolata.ps1`