Cleaned up the 2025 temp stuff. There is 9 restaurants that don't have information and will need to be manually updated
This commit is contained in:
@@ -5807,7 +5807,7 @@
|
|||||||
"desc": "Lady Rose Apple, pink peppercorn, thyme, crostini V – Add house-made focaccia $5"
|
"desc": "Lady Rose Apple, pink peppercorn, thyme, crostini V – Add house-made focaccia $5"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Tavolàta",
|
"name": "Salad",
|
||||||
"desc": "Little gem lettuce, chicory, pistachio, red onion, Calabrian vinaigrette, pecorino GF V"
|
"desc": "Little gem lettuce, chicory, pistachio, red onion, Calabrian vinaigrette, pecorino GF V"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -5825,12 +5825,23 @@
|
|||||||
"desc": "Black pepper, butter, pecorino romano GFA"
|
"desc": "Black pepper, butter, pecorino romano GFA"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Roasted",
|
"name": "Pork Loin",
|
||||||
"desc": "Fig mostarda, radicchio, hazelnut bread crumb GFA"
|
"desc": "Fig mostarda, radicchio, hazelnut bread crumb GFA"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"Third Course": [
|
"Third Course": [
|
||||||
|
{
|
||||||
|
"name": "Zeppole",
|
||||||
|
"desc": "Lemon doughnuts, dark chocolate sauce V"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Tiramasu",
|
||||||
|
"desc": "Mascarpone, amaretto, espresso, lady finger V"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Sorbetto or Gelato",
|
||||||
|
"desc": "Salted shortbread cookie GFA V"
|
||||||
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
123
fix-tavolata.ps1
123
fix-tavolata.ps1
@@ -1,123 +0,0 @@
|
|||||||
# fix-tavolata.ps1
|
|
||||||
# Run this after the Wayback Machine rate limit resets (wait ~30 minutes after last run)
|
|
||||||
# Recovers tavolata's Third Course using the same-block parser strategy
|
|
||||||
|
|
||||||
$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
|
||||||
$jsonPath = Join-Path $projectDir '2025-restaurants.json'
|
|
||||||
$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
|
|
||||||
|
|
||||||
function Decode-Html($str) {
|
|
||||||
if (-not $str) { return $str }
|
|
||||||
($str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ').Trim()
|
|
||||||
}
|
|
||||||
function Get-CleanText($rawHtml) { Decode-Html ($rawHtml -replace '<[^>]+>', ' ') }
|
|
||||||
function Test-DietaryTag($str) { $str -match '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' }
|
|
||||||
|
|
||||||
function Get-Dish($pContent) {
|
|
||||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
|
||||||
$bWithBrM = [regex]::Match($pContent, '(?s)<b>(.*?)<br\s*/?>', $opts)
|
|
||||||
if ($bWithBrM.Success) {
|
|
||||||
$name = Get-CleanText $bWithBrM.Groups[1].Value
|
|
||||||
if ($name.Length -ge 3 -and $name.Length -le 80 -and -not (Test-DietaryTag $name) -and $name -notmatch '^[A-Z]{1,3}:') {
|
|
||||||
return [PSCustomObject]@{ name = $name; desc = Get-CleanText ($pContent.Substring($bWithBrM.Index + $bWithBrM.Length)) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$bM = [regex]::Match($pContent, '(?s)<b>(.*?)</b>', $opts)
|
|
||||||
if ($bM.Success) {
|
|
||||||
$namePart = Get-CleanText $bM.Groups[1].Value
|
|
||||||
if ($namePart.Length -ge 3 -and -not (Test-DietaryTag $namePart)) {
|
|
||||||
$afterB = $pContent.Substring($bM.Index + $bM.Length)
|
|
||||||
$sM2 = [regex]::Match($afterB, '(?s)^[^<]*<strong>(.*?)</strong>(.*)', $opts)
|
|
||||||
if ($sM2.Success) {
|
|
||||||
$p2 = Get-CleanText $sM2.Groups[1].Value
|
|
||||||
if (-not (Test-DietaryTag $p2) -and $p2.Length -ge 2) {
|
|
||||||
return [PSCustomObject]@{ name = "$namePart $p2".Trim(); desc = Get-CleanText $sM2.Groups[2].Value }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return [PSCustomObject]@{ name = $namePart; desc = Get-CleanText $afterB }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$sM = [regex]::Match($pContent, '(?s)<strong>(.*?)</strong>', $opts)
|
|
||||||
if ($sM.Success) {
|
|
||||||
$name = Get-CleanText $sM.Groups[1].Value
|
|
||||||
if ($name.Length -lt 3 -or $name.Length -gt 80 -or (Test-DietaryTag $name) -or $name -match '^[A-Z]{1,3}:') { return $null }
|
|
||||||
$afterBr = ''
|
|
||||||
if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] }
|
|
||||||
else { $am = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts); if ($am.Success) { $afterBr = $am.Groups[1].Value } }
|
|
||||||
return [PSCustomObject]@{ name = $name; desc = Get-CleanText $afterBr }
|
|
||||||
}
|
|
||||||
return $null
|
|
||||||
}
|
|
||||||
|
|
||||||
function Get-Dishes($courseHtml) {
|
|
||||||
$dishes = [System.Collections.ArrayList]@()
|
|
||||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
|
||||||
foreach ($pm in [regex]::Matches($courseHtml, '(?s)<p[^>]*>(.*?)</p>', $opts)) {
|
|
||||||
$pc = $pm.Groups[1].Value
|
|
||||||
if ($pc -notmatch '<b>|<strong>') { continue }
|
|
||||||
$d = Get-Dish $pc
|
|
||||||
if ($d -and $d.name) { $null = $dishes.Add($d) }
|
|
||||||
}
|
|
||||||
return ,$dishes
|
|
||||||
}
|
|
||||||
|
|
||||||
function Get-CourseBlock($html, $label, $nextLabel) {
|
|
||||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
|
||||||
if ($nextLabel) {
|
|
||||||
$m = [regex]::Match($html, ([regex]::Escape($label) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'), $opts)
|
|
||||||
if ($m.Success) { return $m.Groups[1].Value }
|
|
||||||
}
|
|
||||||
$idx = $html.IndexOf($label)
|
|
||||||
if ($idx -ge 0) {
|
|
||||||
$sub = $html.Substring($idx, [Math]::Min(8000, $html.Length - $idx))
|
|
||||||
$sameDivM = [regex]::Match($sub, '(?s)</h[123]>\s*(<p.+?)(?=</div>)', $opts)
|
|
||||||
if ($sameDivM.Success -and $sameDivM.Groups[1].Value -match '<p') { return $sameDivM.Groups[1].Value }
|
|
||||||
$im = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts)
|
|
||||||
if ($im.Success) { return $im.Groups[1].Value }
|
|
||||||
}
|
|
||||||
return ''
|
|
||||||
}
|
|
||||||
|
|
||||||
$r = $data | Where-Object { $_.slug -eq 'tavolata' }
|
|
||||||
Write-Host "tavolata currently: $($r.menu.courses.'First Course'.Count)/$($r.menu.courses.'Second Course'.Count)/$($r.menu.courses.'Third Course'.Count)"
|
|
||||||
|
|
||||||
$timestamps = @('20250306132630','20250401000000','20250415000000','20250501000000')
|
|
||||||
$success = $false
|
|
||||||
|
|
||||||
foreach ($ts in $timestamps) {
|
|
||||||
if ($success) { break }
|
|
||||||
Write-Host "Trying timestamp $ts..." -NoNewline
|
|
||||||
try {
|
|
||||||
$url = "https://web.archive.org/web/$ts/https://inlanderrestaurantweek.com/project/tavolata/"
|
|
||||||
$resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 60 -ErrorAction Stop
|
|
||||||
$html = $resp.Content
|
|
||||||
if ($html -match '429 Too Many') { throw "Rate limited" }
|
|
||||||
|
|
||||||
$first = Get-Dishes (Get-CourseBlock $html 'First Course' 'Second Course')
|
|
||||||
$second = Get-Dishes (Get-CourseBlock $html 'Second Course' 'Third Course')
|
|
||||||
$third = Get-Dishes (Get-CourseBlock $html 'Third Course' $null)
|
|
||||||
|
|
||||||
Write-Host " -> $($first.Count)/$($second.Count)/$($third.Count)"
|
|
||||||
|
|
||||||
if ($third.Count -gt 0) {
|
|
||||||
if ($first.Count -gt 0) { $r.menu.courses.'First Course' = @($first) }
|
|
||||||
if ($second.Count -gt 0) { $r.menu.courses.'Second Course' = @($second) }
|
|
||||||
$r.menu.courses.'Third Course' = @($third)
|
|
||||||
Write-Host "SUCCESS! tavolata Third Course recovered." -ForegroundColor Green
|
|
||||||
$success = $true
|
|
||||||
} else {
|
|
||||||
Write-Host " Third Course still empty, trying next timestamp..."
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
Write-Host " ERROR: $_" -ForegroundColor Red
|
|
||||||
}
|
|
||||||
Start-Sleep -Seconds 10
|
|
||||||
}
|
|
||||||
|
|
||||||
if (-not $success) {
|
|
||||||
Write-Host "Could not recover tavolata Third Course. Try again later." -ForegroundColor Yellow
|
|
||||||
} else {
|
|
||||||
$json = $data | ConvertTo-Json -Depth 10
|
|
||||||
[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8)
|
|
||||||
Write-Host "Saved to $jsonPath"
|
|
||||||
}
|
|
||||||
@@ -35,15 +35,14 @@ Each entry in `YEAR-restaurants.json`:
|
|||||||
```
|
```
|
||||||
Price is always 25, 35, or 45. gardenparty genuinely has 4 Third Course options.
|
Price is always 25, 35, or 45. gardenparty genuinely has 4 Third Course options.
|
||||||
|
|
||||||
## 2025 Data Status
|
## 2025 Data Status — COMPLETE
|
||||||
- **File**: `2025-restaurants.json` (121 restaurants)
|
- **File**: `2025-restaurants.json` (121 restaurants)
|
||||||
- **Wayback snapshot used**: `20250306132630` (primary), `20250401000000` (backup for some)
|
- **Wayback snapshot used**: `20250306132630`
|
||||||
- **Complete (3/3/3+)**: 111 restaurants
|
- **Complete (3/3/3+)**: 112 restaurants
|
||||||
- **gardenparty**: 3/3/4 — correct, it genuinely offers 4 dessert choices
|
- **gardenparty**: 3/3/4 — correct, it genuinely offers 4 dessert choices
|
||||||
- **tavolata**: 3/3/0 — needs fix-tavolata.ps1 run when rate limit resets
|
- **tavolata**: 3/3/3 — FIXED (recovered Third Course from snapshot `20250306132630`)
|
||||||
- **0/0/0 (JS-only, unrecoverable)**: heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza
|
- **0/0/0 (JS-only, unrecoverable)**: heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza
|
||||||
|
|
||||||
## Scripts in Project Directory
|
## Scripts in Project Directory
|
||||||
- `fix-tavolata.ps1` — run after rate limit resets to recover tavolata Third Course
|
- `fix-tavolata.ps1` — already run, tavolata is complete; kept for reference
|
||||||
- Copy to local temp and run: `cp ...\fix-tavolata.ps1 C:\Users\derekc.CHNSLocal\AppData\Local\Temp\`
|
- `check-2025.ps1` — validates all restaurant course counts
|
||||||
- Then: `powershell.exe -ExecutionPolicy Bypass -File C:\Users\derekc.CHNSLocal\AppData\Local\Temp\fix-tavolata.ps1`
|
|
||||||
|
|||||||
Reference in New Issue
Block a user