Setup 2025 files and started parsing the archive site but was rate limited. Will need to finish it in the future.
This commit is contained in:
@@ -14,15 +14,45 @@
|
||||
"courses": {
|
||||
"First Course": [
|
||||
{
|
||||
"name": "Quesadilla",
|
||||
"desc": "Braised short rib, pepperjack cheese, black bean corn salsa, chipotle crema, queso fresco, pico de gallo"
|
||||
"name": "Short Rib",
|
||||
"desc": "Quesadilla Braised short rib, pepperjack cheese, black bean corn salsa, chipotle crema, queso fresco, pico de gallo"
|
||||
},
|
||||
{
|
||||
"name": "Ahi Tuna Crudo",
|
||||
"desc": "Cucumber mignonette, shallots, garlic oil, wonton crisps, watermelon radishes"
|
||||
},
|
||||
{
|
||||
"name": "Kale and Brussels Sprouts Salad",
|
||||
"desc": "Chopped kale, shredded Brussels sprouts, grated"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Southwestern Sirloin",
|
||||
"desc": "Marinated top sirloin, sweet potato hash, chimichurri, fried sweet potato crisps, asparagus GF"
|
||||
},
|
||||
{
|
||||
"name": "Mediterranean Chicken",
|
||||
"desc": "Pan-seared French cut chicken breast, saffron rice, squash medley, lemon garlic aioli, tomato-shallot relish, balsamic glaze"
|
||||
},
|
||||
{
|
||||
"name": "Cajun Jambalaya Pasta",
|
||||
"desc": "Chicken, andouille sausage, creamy Cajun sauce, okra, penne, garlic bread"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Strawberry Bliss",
|
||||
"desc": "Yellow cake stacked with strawberry compote, vanilla custard, basil strawberry coulis, vanilla anglaise"
|
||||
},
|
||||
{
|
||||
"name": "Chocolate Peanut Butter Cake",
|
||||
"desc": "Chocolate and peanut butter layer cake, Irish cream chocolate ganache"
|
||||
},
|
||||
{
|
||||
"name": "Crème Brulée",
|
||||
"desc": "House-made crème brulée, fresh mixed berries GF"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -505,16 +535,49 @@
|
||||
"url": "https://inlanderrestaurantweek.com/project/bardenay/",
|
||||
"menu": {
|
||||
"hours": "Menu served Sun-Thu, 5-9 pm; Fri-Sat, 5-10 pm",
|
||||
"phone": "",
|
||||
"phone": "(208) 765-1540",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Corn Panna Cotta",
|
||||
"desc": "Savory roasted corn custard, cornbread, popcorn, avocado crema, tajin, cotija, and chives V"
|
||||
},
|
||||
{
|
||||
"name": "Steak \u0026 Potato Canapés",
|
||||
"desc": "Charbroiled beef tenderloin, Boursin cheese spread, arugula, pickled red onion, and balsamic glaze on roasted Yukon gold potato medallions GF"
|
||||
},
|
||||
{
|
||||
"name": "Smoked Trout Dip",
|
||||
"desc": "Local Idaho red trout cream cheese spread, beet-marinated deviled eggs, capers, candied bacon, lemon zest, and everything bagel crisps"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Massaman Curry Lamb Shank",
|
||||
"desc": "Braised spiced lamb shank, carrots, and potatoes in a coconut milk curry over saffron basmati rice GF DF"
|
||||
},
|
||||
{
|
||||
"name": "Miso Red Snapper",
|
||||
"desc": "Charbroiled Atlantic red snapper fillet, sweet miso-tamari glaze, chilled soba noodle salad with edamame, water chestnuts, peppers, onions, and sesame seeds DF"
|
||||
},
|
||||
{
|
||||
"name": "Mushroom Spanakopita",
|
||||
"desc": "Wild mushroom medley, spinach, fresh herbs, and feta baked in puff pastry with roasted carrot and chickpea Greek salad and tzatziki V"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Wasabi Mojito Cheesecake",
|
||||
"desc": "Minty-lime cheesecake with a hint of wasabi, Bardenay rum mojito sauce, whipped cream, and frosted lime zest GF"
|
||||
},
|
||||
{
|
||||
"name": "Elvis Cake",
|
||||
"desc": "Peanut butter mousse in a cookie crust with brûléed banana, caramel, candied bacon, whipped cream, and shaved chocolate GF"
|
||||
},
|
||||
{
|
||||
"name": "Root Beer Float",
|
||||
"desc": "Not Your Father’s Root Beer over house-spun espresso-cinnamon ice cream. Must be 21 to order"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -530,16 +593,49 @@
|
||||
"url": "https://inlanderrestaurantweek.com/project/barkrescuepub/",
|
||||
"menu": {
|
||||
"hours": "Menu served Sun-Thur, 4-9 pm; Fri-Sat, 4-10 pm",
|
||||
"phone": "",
|
||||
"phone": "(509) 418-2551",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Korean Spam Dog",
|
||||
"desc": "Two spam and mozzarella-stuffed Korean dogs served with a side of kimchi slaw and sriracha aioli"
|
||||
},
|
||||
{
|
||||
"name": "Crispy Brussels",
|
||||
"desc": "Crispy Brussels sprouts with parmesan and lemon basil dipping sauce GF V"
|
||||
},
|
||||
{
|
||||
"name": "Thai Mozzarella Egg Rolls",
|
||||
"desc": "Mozzarella and Thai peanut sauce egg rolls served with a sriracha dipping sauce"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Garlic Steak Bites",
|
||||
"desc": "Garlic parmesan steak bites on top of a potato mash and served with a chimichurri sauce GF"
|
||||
},
|
||||
{
|
||||
"name": "Tamale with Rojo Pork",
|
||||
"desc": "Green chili and cheese tamale topped with rojo braised pork GF"
|
||||
},
|
||||
{
|
||||
"name": "Tofu Schnitzel Sandwich",
|
||||
"desc": "Ultra-crispy tofu schnitzel sandwich with garlic, mayo, arugula, tomato and a fried egg on a brioche bun V"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Oreo Icebox Cake",
|
||||
"desc": "Oreo Icebox Cake V+"
|
||||
},
|
||||
{
|
||||
"name": "Apple Compote Hand Pie",
|
||||
"desc": "Apple Compote Hand Pie"
|
||||
},
|
||||
{
|
||||
"name": "Roasted Pineapple Angel Food Cake",
|
||||
"desc": "Angel food cake with brown sugar roasted pineapple and a buttercream glaze"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1257,13 +1353,46 @@
|
||||
"phone": "",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Signature Salad",
|
||||
"desc": "Organic baby spinach, brie, bacon, apple and candied walnuts, dressed with pomegranate vinaigrette"
|
||||
},
|
||||
{
|
||||
"name": "Mac n Cheese",
|
||||
"desc": "Rich four-cheese sauce, stuffed with cream cheese and topped with gruyere cheese sauce V"
|
||||
},
|
||||
{
|
||||
"name": "Ahi Poke",
|
||||
"desc": "Yellow fin tuna, green onion, avocado, crispy tortilla, seaweed and cucumber, topped with tamari sauce"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Halibut Filet",
|
||||
"desc": "Tender halibut in a buerre blanc sauce on parmesan risotto with seasonal vegetables and Asian cucumbers GF"
|
||||
},
|
||||
{
|
||||
"name": "Rack of Lamb",
|
||||
"desc": "Topped with a huckleberry glaze and goat cheese, served with roasted local potatoes and seasonal vegetables GF"
|
||||
},
|
||||
{
|
||||
"name": "Surf n Turf Burger",
|
||||
"desc": "Beef filet topped with butter poached lobster and bearnaise sauce, served on a brioche bun with garlic fries and a lemon aioli"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Blueberry Cobbler",
|
||||
"desc": "Local blueberries, crispy topping with blueberry sauce, whipped cream and vanilla bean ice cream V"
|
||||
},
|
||||
{
|
||||
"name": "Flourless Chocolate Cake",
|
||||
"desc": "Served with a huckleberry glaze and whipped cream GF V"
|
||||
},
|
||||
{
|
||||
"name": "New York Cheesecake",
|
||||
"desc": "Creamy classic cheesecake topped with strawberry sauce and whipped cream"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1573,13 +1702,46 @@
|
||||
"phone": "",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Burrata Naan",
|
||||
"desc": "Burrata cheese, baby arugula, spinach pesto, marinara sauce on our house-made naan GFA V"
|
||||
},
|
||||
{
|
||||
"name": "Whipped Feta and Pita",
|
||||
"desc": "Creamy whipped feta, pickled garlic, sweet and spicy honey drizzle GFA V"
|
||||
},
|
||||
{
|
||||
"name": "Scallops and Arugula Salad",
|
||||
"desc": "Pan-seared scallops, baby arugula, shaved parmesan, pickled shallots, sunflower seeds, microgreens, tossed with a gin vinaigrette GF"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Bourbon Butter Raviolis",
|
||||
"desc": "Bourbon butter, shallots, garlic, white wine, heavy cream, wild mushroom stuffed raviolis garnished with parsley and parmesan cheese V"
|
||||
},
|
||||
{
|
||||
"name": "Cajun Trout",
|
||||
"desc": "Blackened steelhead trout, sautéed spinach, over garlic Yukon gold mashed potatoes"
|
||||
},
|
||||
{
|
||||
"name": "Birria Beef Over Polenta",
|
||||
"desc": "Birria-braised beef in a consommé, chipotle sauce, cojita cheese, cilantro and tomato"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Waffles and Cream",
|
||||
"desc": "Sugar pearl waffles stuffed with French vanilla bean ice cream, salted bourbon caramel"
|
||||
},
|
||||
{
|
||||
"name": "Tiramisu",
|
||||
"desc": "Layers of coffee-soaked ladyfingers, creamy mascarpone, cocoa dusting"
|
||||
},
|
||||
{
|
||||
"name": "Apple and Cherry Crisp",
|
||||
"desc": "Gingered apple, sour cherry, cinnamon crumble"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1598,13 +1760,46 @@
|
||||
"phone": "(509) 863-9501",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Grilled Octopus Salad",
|
||||
"desc": "Fire grilled octopus, fresh spinach, organic heirloom tomato, cucumber, green onion, pepitas, sweet and tangy citrus vinaigrette GF"
|
||||
},
|
||||
{
|
||||
"name": "Acorn Squash Arancini",
|
||||
"desc": "Risotto, acorn squash, herb seasoned bread crumbs, Parmesan Reggiano, smokey tomato jam, green garnish V"
|
||||
},
|
||||
{
|
||||
"name": "Braised Beet and Fugi Apple Salad",
|
||||
"desc": "Crisp Fugi apple, braised red and golden beets, herbed ricotta cheese, horseradish vinaigrette, fresh herbs, candied pecans GF V"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Bison Meatloaf",
|
||||
"desc": "Savory bison, ground and mixed with traditional breadcrumbs, egg and fresh herbs, sweet and spicy mixed berry glaze, whipped then baked Parmesan Duchess potatoes, garlic roasted green beans"
|
||||
},
|
||||
{
|
||||
"name": "Smoked Pork Ribs",
|
||||
"desc": "Smoked then pan-finished tender pork ribs, tangy smokey chipotle barbecue sauce, crispy broiled sunchokes, sweet corn pureé, house-fried chicharrones GF"
|
||||
},
|
||||
{
|
||||
"name": "Crab and Butternut Squash Pasta",
|
||||
"desc": "Lump Alaskan Crab meat, spiraled butternut squash, buttery white wine sauce, smoked artichoke hearts, heirloom cherry tomatoes, Parmesan Reggiano, green onion curls Vegetarian upon request GF V"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Yuzu and Blonde Chocolate Choux Buns",
|
||||
"desc": "French Choux pastry baked with a sugary craquelin top, citrusy yuzu pastry cream, white chocolate caramel mousse rosette V"
|
||||
},
|
||||
{
|
||||
"name": "Pot de Creme au Chocolat",
|
||||
"desc": "Velvety rich chocolate custard, dulce de leche whipped with heavy cream, candied orange GF V"
|
||||
},
|
||||
{
|
||||
"name": "French Chocolate Cheesecake",
|
||||
"desc": "Creamy, chocolatey cheesecake, sweet walnut crust, boozy caramel sauce, Chantilly cream V"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -2088,13 +2283,46 @@
|
||||
"phone": "(509) 315-4613",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Winter Salad",
|
||||
"desc": "citrus. hazelnut. goat cheese."
|
||||
},
|
||||
{
|
||||
"name": "Smoked Steelhead",
|
||||
"desc": "amaranth. roe. almond."
|
||||
},
|
||||
{
|
||||
"name": "Pork Rillettes",
|
||||
"desc": "Gander \u0026 Ryegrass bread. olive. caper and raisin chutney."
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Casarecce",
|
||||
"desc": "pork shoulder ragout. parmesan."
|
||||
},
|
||||
{
|
||||
"name": "Campanelle",
|
||||
"desc": "squash. pork belly. pepitas."
|
||||
},
|
||||
{
|
||||
"name": "Spaghetti",
|
||||
"desc": "red beef sauce. parmesan."
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Pork Tenderloin",
|
||||
"desc": "carrot. chicory. hazelnut."
|
||||
},
|
||||
{
|
||||
"name": "Scallop",
|
||||
"desc": "cauliflower. mushroom. citrus."
|
||||
},
|
||||
{
|
||||
"name": "Shortrib",
|
||||
"desc": "potatoes. kale. root vegetables."
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -2492,28 +2720,44 @@
|
||||
"courses": {
|
||||
"First Course": [
|
||||
{
|
||||
"name": "GF V",
|
||||
"name": "Subudana Pakore",
|
||||
"desc": "Subudana (tapioca), potatoes, cumin, and finely chopped fresh herbs, fried as a pakore (fritter) GF V"
|
||||
},
|
||||
{
|
||||
"name": "GF V",
|
||||
"name": "Moong Dal Chaat",
|
||||
"desc": "Moong dal (green lentil) pakore (fritter), served chaat-style (street food snack) topped with sweetened yogurt and chutneys GF V"
|
||||
},
|
||||
{
|
||||
"name": "GF V",
|
||||
"name": "Singhara Aloo Tikki",
|
||||
"desc": "Water chestnut flour, potatoes, cilantro, green chiles, ginger, fresh herbs and spices. Crispy on the outside and deliciously soft inside GF V"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
{
|
||||
"name": "GF DFA",
|
||||
"name": "Chicken Methi Mali",
|
||||
"desc": "Chicken and fenugreek leaves cooked with Indian aromas. Freshly chopped ginger, garlic and fresh yogurt make a thick, creamy sauce. Served with rice and naan GF V+A"
|
||||
},
|
||||
{
|
||||
"name": "Saag with Goat",
|
||||
"desc": "Goat marinated with yogurt, fresh herbs and Indian aromas. Cooked in creamy saag (spinach sauce), served with rice and naan GF DFA"
|
||||
},
|
||||
{
|
||||
"name": "Methi Matter Mali",
|
||||
"desc": "Methi (fenugreek) and green peas cooked with fragrant and creamy gravy with spices. Served with rice and naan GF – Coconut milk option / vegetarian option"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
{
|
||||
"name": "GF V+",
|
||||
"name": "Carrot Cake Halwa",
|
||||
"desc": "Fresh carrots roasted in butter, pistachios, cashews, golden raisins, and plenty of ground cardamom"
|
||||
},
|
||||
{
|
||||
"name": "Paan Ice Cream",
|
||||
"desc": "This refreshing paan (betel leaves) ice cream has all the flavors: rose petal jam, candied fennel, dates, and coconut GF V+"
|
||||
},
|
||||
{
|
||||
"name": "Gulabjamun with Rabdi",
|
||||
"desc": "Indian donuts served with delicious, creamy, and rich rabdi (pistachio and cashew sauce) V"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -2766,28 +3010,44 @@
|
||||
"courses": {
|
||||
"First Course": [
|
||||
{
|
||||
"name": "GF V",
|
||||
"name": "Subudana Pakore",
|
||||
"desc": "Subudana (tapioca), potatoes, cumin, and finely chopped fresh herbs, fried as a pakore (fritter) GF V"
|
||||
},
|
||||
{
|
||||
"name": "GF V",
|
||||
"name": "Moong Dal Chaat",
|
||||
"desc": "Moong dal (green lentil) pakore (fritter), served chaat-style (street food snack) topped with sweetened yogurt and chutneys GF V"
|
||||
},
|
||||
{
|
||||
"name": "GF V",
|
||||
"name": "Singhara Aloo Tikki",
|
||||
"desc": "Water chestnut flour, potatoes, cilantro, green chiles, ginger, fresh herbs and spices. Crispy on the outside and deliciously soft inside GF V"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
{
|
||||
"name": "GF DFA",
|
||||
"name": "Chicken Methi Mali",
|
||||
"desc": "Chicken and fenugreek leaves cooked with Indian aromas. Freshly chopped ginger, garlic and fresh yogurt make a thick, creamy sauce. Served with rice and naan GF V+A"
|
||||
},
|
||||
{
|
||||
"name": "Saag with Goat",
|
||||
"desc": "Goat marinated with yogurt, fresh herbs and Indian aromas. Cooked in creamy saag (spinach sauce), served with rice and naan GF DFA"
|
||||
},
|
||||
{
|
||||
"name": "Methi Matter Mali",
|
||||
"desc": "Methi (fenugreek) and green peas cooked with fragrant and creamy gravy with spices. Served with rice and naan GF – Coconut milk option / vegetarian option"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
{
|
||||
"name": "GF V+",
|
||||
"name": "Carrot Cake Halwa",
|
||||
"desc": "Fresh carrots roasted in butter, pistachios, cashews, golden raisins, and plenty of ground cardamom"
|
||||
},
|
||||
{
|
||||
"name": "Paan Ice Cream",
|
||||
"desc": "This refreshing paan (betel leaves) ice cream has all the flavors: rose petal jam, candied fennel, dates, and coconut GF V+"
|
||||
},
|
||||
{
|
||||
"name": "Gulabjamun with Rabdi",
|
||||
"desc": "Indian donuts served with delicious, creamy, and rich rabdi (pistachio and cashew sauce) V"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -3007,31 +3267,43 @@
|
||||
"courses": {
|
||||
"First Course": [
|
||||
{
|
||||
"name": "GFA V V+",
|
||||
"name": "Hummus Dip with Pita",
|
||||
"desc": "Slow-cooked garbanzo beans blended with tahini, lemon and garlic, topped with extra virgin olive oil and served with pita GFA V V+ – Add beef shawarma meat $14"
|
||||
},
|
||||
{
|
||||
"name": "GFA V",
|
||||
"name": "Tzatziki Dip with Pita",
|
||||
"desc": "Fresh Greek yogurt, cucumber, fresh dill, mint and garlic, served with pita GFA V – Add lamb kofta skewer $9"
|
||||
},
|
||||
{
|
||||
"name": "GF V V+",
|
||||
"name": "Vegetarian Grape Leaves with Tzatziki",
|
||||
"desc": "Six handmade rolls stuffed with seasoned rice and vegetables, slow-cooked in tomato broth and olive oil, served with tzatziki sauce GF V V+ – Add gyro meat $12"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
{
|
||||
"name": "GF V V+",
|
||||
"name": "Chicken Shish Kebab Platter",
|
||||
"desc": "Two skewers of marinated tenderloin chunks grilled on an open flame, served over saffron turmeric basmati rice with a side of Mediterranean salad and garlic sauce GF – Upgrade side to tabouleh $6"
|
||||
},
|
||||
{
|
||||
"name": "Falafel Dinner Platter",
|
||||
"desc": "Deep fried falafel patties served over saffron turmeric basmati rice with a side of Mediterranean salad and our tahini sauce GF V V+ – Upgrade side to lentil soup $8"
|
||||
},
|
||||
{
|
||||
"name": "Gyros Greek Platter",
|
||||
"desc": "Grilled beef and lamb strips served over saffron turmeric basmati rice with a side of Mediterranean salad and our tahini sauce DF – Upgrade side to Greek salad $6"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
{
|
||||
"name": "GF V",
|
||||
"name": "Lebanese Rice Pudding",
|
||||
"desc": "Creamy rice pudding flavored with orange blossom and rose water GF V – Add ice cream scoop $5"
|
||||
},
|
||||
{
|
||||
"name": "GF V V+",
|
||||
"name": "Namoura (Semolina Cake)",
|
||||
"desc": "Popular classic Middle Eastern dessert made with semolina flour and topped with a sweet sugar syrup – Add nuts and honey $5"
|
||||
},
|
||||
{
|
||||
"name": "Halva",
|
||||
"desc": "A Middle Eastern treat made from tahini GF V V+ – Add pita $2"
|
||||
}
|
||||
]
|
||||
@@ -3628,13 +3900,46 @@
|
||||
"phone": "",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "EFESTĒ Feral Sauvignon Blanc",
|
||||
"desc": "White peach, lime leaf, flint"
|
||||
},
|
||||
{
|
||||
"name": "No-Li Porch Glow Amber",
|
||||
"desc": "Crisp and refreshing with a hint of chocolate malt"
|
||||
},
|
||||
{
|
||||
"name": "Maple New Fashioned",
|
||||
"desc": "Browne Family whiskey, barrel-aged maple syrup, Amarena cherry juice, Peychaud’s bitters"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Double Smash Burger",
|
||||
"desc": "Two quarter-pound Prime beef patties, American cheese, caramelized onion, aioli and Thousand Island on a brioche bun, served with french fries"
|
||||
},
|
||||
{
|
||||
"name": "8 oz. New York",
|
||||
"desc": "Russet purée, seasonal vegetable, brandy peppercorn demi-glace"
|
||||
},
|
||||
{
|
||||
"name": "Pan-Seared Salmon",
|
||||
"desc": "Champagne vinaigrette orzo, Brussels sprouts, bacon, mustard beurre blanc"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Crème Brûlée",
|
||||
"desc": "Baked coconut vanilla custard, caramelized sugar"
|
||||
},
|
||||
{
|
||||
"name": "Cranberry Orange Chiffon",
|
||||
"desc": "White chocolate chiffon cake, orange white chocolate mousse, cranberry gelée"
|
||||
},
|
||||
{
|
||||
"name": "Strawberry Rhubarb Sorbet",
|
||||
"desc": "Fresh berries, mint"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -3944,13 +4249,46 @@
|
||||
"phone": "(509) 323-2578",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Burrata Caprese",
|
||||
"desc": "Heirloom tomato, red pesto, basil, grilled baguette V"
|
||||
},
|
||||
{
|
||||
"name": "Roasted Beet Salad",
|
||||
"desc": "Roasted beets and watercress, pancetta, chevre, lemon vinaigrette GF"
|
||||
},
|
||||
{
|
||||
"name": "Baby Kale Caesar",
|
||||
"desc": "Baby kale, garlic crouton, aged Parmesan Add chicken $5 or shrimp $8"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Risotto with Roasted Butternut Squash",
|
||||
"desc": "Risotto, roasted butternut squash, sage, smoked gouda and mascarpone"
|
||||
},
|
||||
{
|
||||
"name": "Carleton Farms Pork Loin",
|
||||
"desc": "Pork loin brined and lightly smoked, baby kale, garlic chips, gruyere mashed potato, honey-apricot gastrique"
|
||||
},
|
||||
{
|
||||
"name": "Flatiron Steak Frites Wild Mushrooms",
|
||||
"desc": "Flatiron steak frites wild mushrooms, roasted leeks, 10-year aged balsamic, truffle fries"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Peach Bread Pudding",
|
||||
"desc": "Fig jam, vanilla crème anglaise"
|
||||
},
|
||||
{
|
||||
"name": "Coconut Panna Cotta",
|
||||
"desc": "Coconut panna cotta, raspberries, lemon curd, Chantilly cream"
|
||||
},
|
||||
{
|
||||
"name": "Crème Brulee",
|
||||
"desc": ""
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -4085,15 +4423,45 @@
|
||||
"phone": "(509) 934-1979",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Steak Bites",
|
||||
"desc": "Steak bites marinated in house-made signature steak sauce. Served with cheesy garlic bread"
|
||||
},
|
||||
{
|
||||
"name": "Caesar Salad",
|
||||
"desc": "Romaine, house-made Caesar dressing, croutons and grated parmesan cheese GFA – Add chicken $6"
|
||||
},
|
||||
{
|
||||
"name": "Garlic Cheese Curds",
|
||||
"desc": "Garlic breaded Wisconsin white cheddar cheese curds with Ponderosa boom-boom dipping sauce V"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Chicken Fried Steak",
|
||||
"desc": "12 oz. chicken fried steak served with vegetable medley and choice of potatoes"
|
||||
},
|
||||
{
|
||||
"name": "Bleu Cheese \u0026 Pecan Salmon",
|
||||
"desc": "Pecan and bleu cheese crusted grilled salmon. Served with white rice and vegetable medley GFA"
|
||||
},
|
||||
{
|
||||
"name": "Bourbon Chicken",
|
||||
"desc": "Grilled chicken breast topped with mushroom and onion bourbon sauce. Served with mashed potatoes and vegetable medley"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
{
|
||||
"name": "GFA V",
|
||||
"name": "Brownie Skillet",
|
||||
"desc": "Warm brownie in a cast iron skillet topped with vanilla ice cream V"
|
||||
},
|
||||
{
|
||||
"name": "Creme Brulee",
|
||||
"desc": "House-made creme brulee GFA V"
|
||||
},
|
||||
{
|
||||
"name": "Lemon Cookie Sandwich",
|
||||
"desc": "Lemon and white chocolate chip cookie filled with vanilla ice cream and house-made bourbon caramel sauce V"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -4735,13 +5103,46 @@
|
||||
"phone": "(509) 326-7251",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Celtic Caesar Salad",
|
||||
"desc": "Chopped romaine, baby kale, shaved Parmesan, house-made croutons"
|
||||
},
|
||||
{
|
||||
"name": "Donegal Bay Clam Chowder",
|
||||
"desc": "Creamy clam chowder, chopped red pepper, cabbage, onion, potato"
|
||||
},
|
||||
{
|
||||
"name": "Boxty Cakes",
|
||||
"desc": "Two breaded potato cakes with corned beef and Dubliner cheese, deep fried"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Corned Beef and Cabbage",
|
||||
"desc": "Our signature dish! Slow-cooked, tender corned beef, braised cabbage, colcannon potatoes, creamy horseradish"
|
||||
},
|
||||
{
|
||||
"name": "Guinness Beef Stew",
|
||||
"desc": "Guinness-braised Kobe beef cubes, rustic-cut carrots, parsnips, celery, pearl onions with Colcannon-style mashed potatoes on top"
|
||||
},
|
||||
{
|
||||
"name": "Fish and Chips",
|
||||
"desc": "Two pieces of wild-caught, sustainable Pacific Cod, hand-breaded in crispy panko and seasonings. Comes with French fries and scratch-made tartar sauce"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Irish Bread Pudding",
|
||||
"desc": "Scratch-made and topped with a buttered rum sauce and currants"
|
||||
},
|
||||
{
|
||||
"name": "Bailey’s Creme Brulee",
|
||||
"desc": "Bailey’s custard with a caramelized sugar topping and a shortbread shamrock cookie"
|
||||
},
|
||||
{
|
||||
"name": "Danny Boy’s Chocolate Brownie",
|
||||
"desc": "Scratch-made chocolate brownie topped with candied pecans and Andes mint crumbles"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -5343,13 +5744,46 @@
|
||||
"phone": "(509) 598-4300",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Burrata Cheese and Local Beets",
|
||||
"desc": "Arugula, heirloom tomatoes, aged balsamic reduction GF, V"
|
||||
},
|
||||
{
|
||||
"name": "Grapefruit Salad",
|
||||
"desc": "Bibb lettuce, goat cheese, avocado, mandarin oranges, mint-lime vinaigrette GF, V"
|
||||
},
|
||||
{
|
||||
"name": "Smoky Butternut Squash Bisque",
|
||||
"desc": "Chipotle pepitas, cilantro GF, V"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Grilled Bone-In Pork Chop",
|
||||
"desc": "Smoked apple puree, mashed potatoes, local vegetables GF"
|
||||
},
|
||||
{
|
||||
"name": "Blueberry Duck Breast",
|
||||
"desc": "Blueberry gastrique, fingerling potatoes, local vegetables GF"
|
||||
},
|
||||
{
|
||||
"name": "Pan Seared Sea Scallops",
|
||||
"desc": "Lemon-tarragon risotto, brown butter, local vegetables GF"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Rolo Dome",
|
||||
"desc": "Chocolate mousse, salted caramel GF, V"
|
||||
},
|
||||
{
|
||||
"name": "Blood Orange Cheesecake",
|
||||
"desc": "Vanilla cheesecake, blood orange gelee V"
|
||||
},
|
||||
{
|
||||
"name": "Crème Brûlée",
|
||||
"desc": "Housemade vanilla custard GF, V"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -5415,13 +5849,46 @@
|
||||
"phone": "(208) 758-0111",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Tequila Clams",
|
||||
"desc": "Reposado tequila, Argentinian chorizo, clams, herbs, lime, grilled sourdough GFA Double portion size \u0026#8211; $8"
|
||||
},
|
||||
{
|
||||
"name": "Half Salad",
|
||||
"desc": "Half size portion of any salad: Quinoa Greens V+ , Jicama Citrus V , Warmed Spinach, Roasted Beet V , Cezar GFA Add your choice of protein \u0026#8211; $6, Full size salad \u0026#8211; $6"
|
||||
},
|
||||
{
|
||||
"name": "Elote Cakes",
|
||||
"desc": "Roasted corn, masa, queso fresco, cilantro, crema, pickled Fresno chilis GF V"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Churrasco Steak",
|
||||
"desc": "Argentinian-style grilled skirt steak, chimichurri, mashed sweet potatoes with coconut milk GF Add grilled shrimp \u0026#8211; $8"
|
||||
},
|
||||
{
|
||||
"name": "Arroz con Gandules",
|
||||
"desc": "Honduran-style rice dish with pork, pigeon peas, vegetables and spices GF"
|
||||
},
|
||||
{
|
||||
"name": "Chicken or Vegetable Tamale",
|
||||
"desc": "Slow cooked pulled chicken or spiced roasted vegetable blend (V), stuffed inside our banana leaf-wrapped Oaxacan style tamales. Gallo pinto and dressed jicama slaw GF Add a second Tamale \u0026#8211; $10"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Arroz con Leche Flan",
|
||||
"desc": "Latin-style spiced rice pudding composed within a velvety custard of a flan GF V"
|
||||
},
|
||||
{
|
||||
"name": "Chocolate Torte",
|
||||
"desc": "Flourless chocolate torte, pink peppercorn goat’s milk panna cotta, burnt candied orange peel, masa crumble GF V"
|
||||
},
|
||||
{
|
||||
"name": "Alfajores y Cafe",
|
||||
"desc": "Latin shortbread cookies served with our signature coffee blend from Coeur d’Alene Coffee Company V Add rum (to your coffee) $8"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -6108,13 +6575,46 @@
|
||||
"phone": "",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Boudin Balls",
|
||||
"desc": "Louisiana’s version of a snacking sausage. Served with a jalapeño remoulade"
|
||||
},
|
||||
{
|
||||
"name": "Whipped Honey Cornbread",
|
||||
"desc": "Cast-iron seared, whipped honey butter, scallions V"
|
||||
},
|
||||
{
|
||||
"name": "Louisiana Garlic Soup",
|
||||
"desc": "Creamy roasted garlic soup. A Louisiana tradition!"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Duck Gumbo",
|
||||
"desc": "Duck and Andouille sausage gumbo"
|
||||
},
|
||||
{
|
||||
"name": "Trout Meuniere",
|
||||
"desc": "Steelhead trout with a brown butter sauce"
|
||||
},
|
||||
{
|
||||
"name": "Pork Grillades",
|
||||
"desc": "Slow-roasted pork shoulder served with our stone-ground cheesy grits GF"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "King Cake",
|
||||
"desc": "Who will find the baby?! V"
|
||||
},
|
||||
{
|
||||
"name": "Beignets",
|
||||
"desc": "A traditional French doughnut V \u0026#8211; Make it an affogato! $7"
|
||||
},
|
||||
{
|
||||
"name": "Pear \u0026 Almond Tart",
|
||||
"desc": "Almond frangipane pastry with poached pear topping V"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -6193,13 +6693,46 @@
|
||||
"phone": "",
|
||||
"courses": {
|
||||
"First Course": [
|
||||
|
||||
{
|
||||
"name": "Roasted Beet Salad",
|
||||
"desc": "Organic spring greens tossed in a balsamic white truffle vinaigrette, topped with roasted beets, toasted hazelnuts and chèvre GF V – Vegan available by request"
|
||||
},
|
||||
{
|
||||
"name": "Jalapeño Cheddar Chicken Soup",
|
||||
"desc": "Chicken stock base, jalapeños, sharp cheddar cheese, shredded chicken, fresh herbs, and a touch of cream. A must try! GF"
|
||||
},
|
||||
{
|
||||
"name": "Classic Caesar Salad",
|
||||
"desc": "Crisp romaine hearts tossed with a classic creamy Caesar dressing, house croutons and Parmigiano-Reggiano cheese. Served with lemon GFA"
|
||||
}
|
||||
],
|
||||
"Second Course": [
|
||||
|
||||
{
|
||||
"name": "Yellow Curry Prawns or Tofu",
|
||||
"desc": "House-made yellow curry with just the right amount of kick, with your choice of tofu or prawns. Served with a coconut rice cake, sweet peas, heirloom carrots, roasted red peppers, and micro pea shoots GF – Vegetarian and vegan option available"
|
||||
},
|
||||
{
|
||||
"name": "Braised Beef Short Ribs",
|
||||
"desc": "Boneless beef short ribs roasted for 16 hours with rosemary and thyme, served with a green peppercorn red wine pan sauce and “everything” Yukon potatoes GF"
|
||||
},
|
||||
{
|
||||
"name": "Hunter’s Chicken",
|
||||
"desc": "Chicken leg and thigh quarter, cured and braised until falling off the bone served with a rich hunter’s sauce full of vegetables, herbs and chicken stock and “everything” Yukon potatoes GF"
|
||||
}
|
||||
],
|
||||
"Third Course": [
|
||||
|
||||
{
|
||||
"name": "Mini Margarita Pie",
|
||||
"desc": "Frozen key lime pie with graham cracker crust. Special ingredient: tequila! Topped with whipped cream and red sea salt V"
|
||||
},
|
||||
{
|
||||
"name": "Wiley’s Bourbon Creme Brûlée",
|
||||
"desc": "Delicious brûléed custard with vanilla bean and orange peel topped with bourbon-nutmeg caramel and whipped cream GF V"
|
||||
},
|
||||
{
|
||||
"name": "Chocolate Pot de Creme",
|
||||
"desc": "A decadent dessert featuring coconut cream, chocolate and a hint of spice. Topped with raspberry puree and candied pecan GF V – Vegan available by request"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
121
fix-2025.ps1
121
fix-2025.ps1
@@ -1,121 +0,0 @@
|
||||
# fix-2025.ps1 - Post-process the scraped 2025 restaurant JSON
|
||||
# Fixes: HTML entities in names/descs, wrong prices, re-fetches 0-course restaurants
|
||||
|
||||
$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$jsonPath = Join-Path $projectDir '2025-restaurants.json'
|
||||
|
||||
$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
|
||||
|
||||
function Decode-Html($str) {
|
||||
if (-not $str) { return $str }
|
||||
$s = $str
|
||||
$s = $s -replace '&', '&'
|
||||
$s = $s -replace ''', "'"
|
||||
$s = $s -replace '"', '"'
|
||||
$s = $s -replace '<', '<'
|
||||
$s = $s -replace '>', '>'
|
||||
$s = $s -replace ' ', ' '
|
||||
$s = $s -replace '\s+', ' '
|
||||
$s.Trim()
|
||||
}
|
||||
|
||||
# ---- Report issues ----
|
||||
Write-Host "=== Data Quality Report ==="
|
||||
Write-Host "Total restaurants: $($data.Count)"
|
||||
Write-Host ""
|
||||
|
||||
Write-Host "Wrong prices (not 25/35/45):"
|
||||
$data | Where-Object { $_.price -notin @(25,35,45) } | ForEach-Object {
|
||||
Write-Host " $($_.slug): price=$($_.price)"
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Zero-course restaurants (all 3 empty):"
|
||||
$data | Where-Object {
|
||||
$_.menu.courses.'First Course'.Count -eq 0 -and
|
||||
$_.menu.courses.'Second Course'.Count -eq 0 -and
|
||||
$_.menu.courses.'Third Course'.Count -eq 0
|
||||
} | ForEach-Object { Write-Host " $($_.slug) [price=$($_.price)] name=$($_.name)" }
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Partial courses (any course != 3):"
|
||||
$data | Where-Object {
|
||||
$_.menu.courses.'First Course'.Count -ne 3 -or
|
||||
$_.menu.courses.'Second Course'.Count -ne 3 -or
|
||||
$_.menu.courses.'Third Course'.Count -ne 3
|
||||
} | Where-Object {
|
||||
# Exclude totally empty ones (already reported above)
|
||||
-not (
|
||||
$_.menu.courses.'First Course'.Count -eq 0 -and
|
||||
$_.menu.courses.'Second Course'.Count -eq 0 -and
|
||||
$_.menu.courses.'Third Course'.Count -eq 0
|
||||
)
|
||||
} | ForEach-Object {
|
||||
$c1 = $_.menu.courses.'First Course'.Count
|
||||
$c2 = $_.menu.courses.'Second Course'.Count
|
||||
$c3 = $_.menu.courses.'Third Course'.Count
|
||||
Write-Host " $($_.slug): $c1/$c2/$c3"
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "=== Applying Fixes ==="
|
||||
|
||||
# ---- Fix HTML entities in all string fields ----
|
||||
foreach ($r in $data) {
|
||||
$r.name = Decode-Html $r.name
|
||||
$r.cuisine = Decode-Html $r.cuisine
|
||||
|
||||
foreach ($course in @('First Course', 'Second Course', 'Third Course')) {
|
||||
$items = $r.menu.courses.$course
|
||||
if ($items) {
|
||||
foreach ($item in $items) {
|
||||
$item.name = Decode-Html $item.name
|
||||
$item.desc = Decode-Html $item.desc
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# ---- Fix wrong prices using the Wayback Machine price page ----
|
||||
# The price page listed restaurants under $25, $35, $45 sections.
|
||||
# We'll re-fetch pages for wrong-price restaurants using a tighter regex.
|
||||
|
||||
$wrongPrice = $data | Where-Object { $_.price -notin @(25,35,45) }
|
||||
if ($wrongPrice.Count -gt 0) {
|
||||
Write-Host "Re-fetching $($wrongPrice.Count) restaurants with wrong prices..."
|
||||
|
||||
foreach ($r in $wrongPrice) {
|
||||
Write-Host " $($r.slug)..." -NoNewline
|
||||
try {
|
||||
$url = "https://web.archive.org/web/20250306132630/https://inlanderrestaurantweek.com/project/$($r.slug)/"
|
||||
$resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 60 -ErrorAction Stop
|
||||
$html = $resp.Content
|
||||
|
||||
# Look specifically for h1 containing a 2-digit price at a tier
|
||||
$priceM = [regex]::Match($html, '<h1[^>]*>.*?<strong>\$(25|35|45)</strong>', [System.Text.RegularExpressions.RegexOptions]::Singleline)
|
||||
if ($priceM.Success) {
|
||||
$r.price = [int]$priceM.Groups[1].Value
|
||||
Write-Host " fixed to $($r.price)"
|
||||
} else {
|
||||
# Try all strong dollar values and pick first that's 25, 35, or 45
|
||||
$allPrices = [regex]::Matches($html, '<strong>\$(\d+)</strong>')
|
||||
$validPrice = $allPrices | Where-Object { $_.Groups[1].Value -in @('25','35','45') } | Select-Object -First 1
|
||||
if ($validPrice) {
|
||||
$r.price = [int]$validPrice.Groups[1].Value
|
||||
Write-Host " fixed to $($r.price)"
|
||||
} else {
|
||||
Write-Host " could not determine - left at $($r.price)"
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
Write-Host " FETCH ERROR: $_"
|
||||
}
|
||||
Start-Sleep -Milliseconds 300
|
||||
}
|
||||
}
|
||||
|
||||
# ---- Save fixed JSON ----
|
||||
$json = $data | ConvertTo-Json -Depth 10
|
||||
[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8)
|
||||
Write-Host ""
|
||||
Write-Host "Saved fixed JSON to $jsonPath"
|
||||
123
fix-tavolata.ps1
Normal file
123
fix-tavolata.ps1
Normal file
@@ -0,0 +1,123 @@
|
||||
# fix-tavolata.ps1
|
||||
# Run this after the Wayback Machine rate limit resets (wait ~30 minutes after last run)
|
||||
# Recovers tavolata's Third Course using the same-block parser strategy
|
||||
|
||||
$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$jsonPath = Join-Path $projectDir '2025-restaurants.json'
|
||||
$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
|
||||
|
||||
function Decode-Html($str) {
|
||||
if (-not $str) { return $str }
|
||||
($str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ').Trim()
|
||||
}
|
||||
function Get-CleanText($rawHtml) { Decode-Html ($rawHtml -replace '<[^>]+>', ' ') }
|
||||
function Test-DietaryTag($str) { $str -match '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' }
|
||||
|
||||
function Get-Dish($pContent) {
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
$bWithBrM = [regex]::Match($pContent, '(?s)<b>(.*?)<br\s*/?>', $opts)
|
||||
if ($bWithBrM.Success) {
|
||||
$name = Get-CleanText $bWithBrM.Groups[1].Value
|
||||
if ($name.Length -ge 3 -and $name.Length -le 80 -and -not (Test-DietaryTag $name) -and $name -notmatch '^[A-Z]{1,3}:') {
|
||||
return [PSCustomObject]@{ name = $name; desc = Get-CleanText ($pContent.Substring($bWithBrM.Index + $bWithBrM.Length)) }
|
||||
}
|
||||
}
|
||||
$bM = [regex]::Match($pContent, '(?s)<b>(.*?)</b>', $opts)
|
||||
if ($bM.Success) {
|
||||
$namePart = Get-CleanText $bM.Groups[1].Value
|
||||
if ($namePart.Length -ge 3 -and -not (Test-DietaryTag $namePart)) {
|
||||
$afterB = $pContent.Substring($bM.Index + $bM.Length)
|
||||
$sM2 = [regex]::Match($afterB, '(?s)^[^<]*<strong>(.*?)</strong>(.*)', $opts)
|
||||
if ($sM2.Success) {
|
||||
$p2 = Get-CleanText $sM2.Groups[1].Value
|
||||
if (-not (Test-DietaryTag $p2) -and $p2.Length -ge 2) {
|
||||
return [PSCustomObject]@{ name = "$namePart $p2".Trim(); desc = Get-CleanText $sM2.Groups[2].Value }
|
||||
}
|
||||
}
|
||||
return [PSCustomObject]@{ name = $namePart; desc = Get-CleanText $afterB }
|
||||
}
|
||||
}
|
||||
$sM = [regex]::Match($pContent, '(?s)<strong>(.*?)</strong>', $opts)
|
||||
if ($sM.Success) {
|
||||
$name = Get-CleanText $sM.Groups[1].Value
|
||||
if ($name.Length -lt 3 -or $name.Length -gt 80 -or (Test-DietaryTag $name) -or $name -match '^[A-Z]{1,3}:') { return $null }
|
||||
$afterBr = ''
|
||||
if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] }
|
||||
else { $am = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts); if ($am.Success) { $afterBr = $am.Groups[1].Value } }
|
||||
return [PSCustomObject]@{ name = $name; desc = Get-CleanText $afterBr }
|
||||
}
|
||||
return $null
|
||||
}
|
||||
|
||||
function Get-Dishes($courseHtml) {
|
||||
$dishes = [System.Collections.ArrayList]@()
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
foreach ($pm in [regex]::Matches($courseHtml, '(?s)<p[^>]*>(.*?)</p>', $opts)) {
|
||||
$pc = $pm.Groups[1].Value
|
||||
if ($pc -notmatch '<b>|<strong>') { continue }
|
||||
$d = Get-Dish $pc
|
||||
if ($d -and $d.name) { $null = $dishes.Add($d) }
|
||||
}
|
||||
return ,$dishes
|
||||
}
|
||||
|
||||
function Get-CourseBlock($html, $label, $nextLabel) {
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
if ($nextLabel) {
|
||||
$m = [regex]::Match($html, ([regex]::Escape($label) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'), $opts)
|
||||
if ($m.Success) { return $m.Groups[1].Value }
|
||||
}
|
||||
$idx = $html.IndexOf($label)
|
||||
if ($idx -ge 0) {
|
||||
$sub = $html.Substring($idx, [Math]::Min(8000, $html.Length - $idx))
|
||||
$sameDivM = [regex]::Match($sub, '(?s)</h[123]>\s*(<p.+?)(?=</div>)', $opts)
|
||||
if ($sameDivM.Success -and $sameDivM.Groups[1].Value -match '<p') { return $sameDivM.Groups[1].Value }
|
||||
$im = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts)
|
||||
if ($im.Success) { return $im.Groups[1].Value }
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
$r = $data | Where-Object { $_.slug -eq 'tavolata' }
|
||||
Write-Host "tavolata currently: $($r.menu.courses.'First Course'.Count)/$($r.menu.courses.'Second Course'.Count)/$($r.menu.courses.'Third Course'.Count)"
|
||||
|
||||
$timestamps = @('20250306132630','20250401000000','20250415000000','20250501000000')
|
||||
$success = $false
|
||||
|
||||
foreach ($ts in $timestamps) {
|
||||
if ($success) { break }
|
||||
Write-Host "Trying timestamp $ts..." -NoNewline
|
||||
try {
|
||||
$url = "https://web.archive.org/web/$ts/https://inlanderrestaurantweek.com/project/tavolata/"
|
||||
$resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 60 -ErrorAction Stop
|
||||
$html = $resp.Content
|
||||
if ($html -match '429 Too Many') { throw "Rate limited" }
|
||||
|
||||
$first = Get-Dishes (Get-CourseBlock $html 'First Course' 'Second Course')
|
||||
$second = Get-Dishes (Get-CourseBlock $html 'Second Course' 'Third Course')
|
||||
$third = Get-Dishes (Get-CourseBlock $html 'Third Course' $null)
|
||||
|
||||
Write-Host " -> $($first.Count)/$($second.Count)/$($third.Count)"
|
||||
|
||||
if ($third.Count -gt 0) {
|
||||
if ($first.Count -gt 0) { $r.menu.courses.'First Course' = @($first) }
|
||||
if ($second.Count -gt 0) { $r.menu.courses.'Second Course' = @($second) }
|
||||
$r.menu.courses.'Third Course' = @($third)
|
||||
Write-Host "SUCCESS! tavolata Third Course recovered." -ForegroundColor Green
|
||||
$success = $true
|
||||
} else {
|
||||
Write-Host " Third Course still empty, trying next timestamp..."
|
||||
}
|
||||
} catch {
|
||||
Write-Host " ERROR: $_" -ForegroundColor Red
|
||||
}
|
||||
Start-Sleep -Seconds 10
|
||||
}
|
||||
|
||||
if (-not $success) {
|
||||
Write-Host "Could not recover tavolata Third Course. Try again later." -ForegroundColor Yellow
|
||||
} else {
|
||||
$json = $data | ConvertTo-Json -Depth 10
|
||||
[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8)
|
||||
Write-Host "Saved to $jsonPath"
|
||||
}
|
||||
130
fix2-2025.ps1
130
fix2-2025.ps1
@@ -1,130 +0,0 @@
|
||||
# fix2-2025.ps1 - Comprehensive fix for 2025 restaurant JSON
|
||||
# 1. Fix all prices using authoritative data from price listing page
|
||||
# 2. Fix HTML entities in all text fields
|
||||
# 3. Report remaining issues
|
||||
|
||||
$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$jsonPath = Join-Path $projectDir '2025-restaurants.json'
|
||||
|
||||
# Load JSON
|
||||
$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
|
||||
|
||||
# ---- Authoritative price map from price listing page ----
|
||||
$authPrices = @{
|
||||
"1898"="45"; "24taps"="25"; "315cuisine"="45"; "ambrosia"="45";
|
||||
"anthonys"="45"; "arrowhead"="25"; "baba"="45"; "backyardpublichouse"="35";
|
||||
"bangkokthai"="35"; "bardenay"="45"; "barkrescuepub"="25"; "beverlys"="45";
|
||||
"blackpearl"="25"; "borracho"="35"; "burgerdock"="25"; "cascadia"="25";
|
||||
"cedars"="45"; "centennial"="35"; "chaps"="45"; "chinook"="45";
|
||||
"chowderhead"="35"; "clinkerdagger"="45"; "cochinito"="25"; "collectivekitchen"="45";
|
||||
"dassteinhaus"="35"; "deleons"="25"; "deleonstexmex"="25"; "dockside"="35";
|
||||
"downriver"="45"; "dryfly"="35"; "durkins"="45"; "east"="45";
|
||||
"emrys"="25"; "feastworldkitchen"="35"; "flameandcork"="35"; "flatstick"="25";
|
||||
"flyinggoat"="25"; "fortheloveofgod"="35"; "francaise"="45"; "ganderryegrass"="35";
|
||||
"gardenparty"="35"; "gildedunicorn"="45"; "hang10"="25"; "heritage"="35";
|
||||
"hogwash"="45"; "honey"="35"; "hulapot"="35"; "indiahouse"="35";
|
||||
"indicana"="45"; "inlandpacifickitchen"="45"; "irongoat"="35"; "ironwoodice"="35";
|
||||
"karma"="35"; "kasa"="25"; "kismet"="35"; "kunisthai"="35";
|
||||
"latahbistro"="45"; "lebanon"="35"; "legendsoffire"="45"; "littledragon"="25";
|
||||
"littlenoodle"="25"; "longhornbbq"="25"; "loren"="45"; "lumberbeard"="35";
|
||||
"macdaddys"="35"; "mackenzieriver"="25"; "mammamias"="25"; "mangotree"="25";
|
||||
"maryhill"="45"; "masselowslounge"="45"; "max"="45"; "meltingpot"="45";
|
||||
"mortys"="25"; "northhill"="35"; "odohertys"="35"; "osprey"="35";
|
||||
"outsider"="45"; "palmcourtgrill"="45"; "ponderosa"="35"; "purenorthwest"="35";
|
||||
"purgatory"="45"; "qqsushi"="35"; "redtail"="35"; "republickitchen"="35";
|
||||
"republicpi"="25"; "rut"="35"; "safariroom"="45"; "saranac"="35";
|
||||
"satay"="45"; "sauced"="25"; "screamingyak"="25"; "seasons"="45";
|
||||
"shawnodonnells"="25"; "shelbys"="25"; "skewers"="25"; "southhillgrill"="45";
|
||||
"southperrylantern"="45"; "spencers"="45"; "steamplant"="35"; "steelhead"="35";
|
||||
"stylus"="35"; "sweetlous"="35"; "swinglounge"="35"; "table13"="45";
|
||||
"tavolata"="45"; "terraza"="35"; "thaibamboo"="25"; "thedambar"="45";
|
||||
"titos"="35"; "tomatostreet"="35"; "tonysonthelake"="45"; "torratea"="45";
|
||||
"truelegends"="25"; "twigs"="35"; "uprise"="25"; "vaqueros"="35";
|
||||
"vicinopizza"="25"; "victoryburger"="25"; "vieuxcarre"="35"; "vineolive"="45";
|
||||
"wileys"="45"
|
||||
}
|
||||
|
||||
function Decode-Html($str) {
|
||||
if (-not $str) { return $str }
|
||||
$s = $str
|
||||
$s = $s -replace '&', '&'
|
||||
$s = $s -replace ''', "'"
|
||||
$s = $s -replace '"', '"'
|
||||
$s = $s -replace '<', '<'
|
||||
$s = $s -replace '>', '>'
|
||||
$s = $s -replace ' ', ' '
|
||||
$s = $s -replace '\s+', ' '
|
||||
$s.Trim()
|
||||
}
|
||||
|
||||
$priceFixed = 0
|
||||
$entitiesFixed = 0
|
||||
|
||||
foreach ($r in $data) {
|
||||
# Fix price from authoritative map
|
||||
if ($authPrices.ContainsKey($r.slug)) {
|
||||
$correctPrice = [int]$authPrices[$r.slug]
|
||||
if ($r.price -ne $correctPrice) {
|
||||
Write-Host "Price fix: $($r.slug) $($r.price) -> $correctPrice"
|
||||
$r.price = $correctPrice
|
||||
$priceFixed++
|
||||
}
|
||||
}
|
||||
|
||||
# Fix HTML entities
|
||||
$oldName = $r.name
|
||||
$r.name = Decode-Html $r.name
|
||||
$r.cuisine = Decode-Html $r.cuisine
|
||||
$r.menu.hours = Decode-Html $r.menu.hours
|
||||
if ($oldName -ne $r.name) { $entitiesFixed++ }
|
||||
|
||||
foreach ($course in @('First Course', 'Second Course', 'Third Course')) {
|
||||
$items = $r.menu.courses.$course
|
||||
if ($items) {
|
||||
foreach ($item in $items) {
|
||||
$item.name = Decode-Html $item.name
|
||||
$item.desc = Decode-Html $item.desc
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Fixed prices: $priceFixed"
|
||||
Write-Host "Fixed names with entities: $entitiesFixed"
|
||||
Write-Host ""
|
||||
|
||||
# ---- Report remaining issues ----
|
||||
Write-Host "=== Remaining Issues ==="
|
||||
Write-Host ""
|
||||
|
||||
Write-Host "Zero-course restaurants (all 3 empty):"
|
||||
$data | Where-Object {
|
||||
$_.menu.courses.'First Course'.Count -eq 0 -and
|
||||
$_.menu.courses.'Second Course'.Count -eq 0 -and
|
||||
$_.menu.courses.'Third Course'.Count -eq 0
|
||||
} | ForEach-Object { Write-Host " $($_.slug) [$($_.price)] $($_.name)" }
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Partial courses (any course count not 3):"
|
||||
$data | Where-Object {
|
||||
($_.menu.courses.'First Course'.Count -ne 3 -or
|
||||
$_.menu.courses.'Second Course'.Count -ne 3 -or
|
||||
$_.menu.courses.'Third Course'.Count -ne 3) -and
|
||||
-not (
|
||||
$_.menu.courses.'First Course'.Count -eq 0 -and
|
||||
$_.menu.courses.'Second Course'.Count -eq 0 -and
|
||||
$_.menu.courses.'Third Course'.Count -eq 0
|
||||
)
|
||||
} | ForEach-Object {
|
||||
$c1 = $_.menu.courses.'First Course'.Count
|
||||
$c2 = $_.menu.courses.'Second Course'.Count
|
||||
$c3 = $_.menu.courses.'Third Course'.Count
|
||||
Write-Host " $($_.slug) [$($_.price)]: $c1/$c2/$c3 - $($_.name)"
|
||||
}
|
||||
|
||||
# ---- Save ----
|
||||
$json = $data | ConvertTo-Json -Depth 10
|
||||
[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8)
|
||||
Write-Host ""
|
||||
Write-Host "Saved to $jsonPath"
|
||||
49
memory/MEMORY.md
Normal file
49
memory/MEMORY.md
Normal file
@@ -0,0 +1,49 @@
|
||||
# Inlander Restaurant Week Picker - Project Memory
|
||||
|
||||
## Quick Reference
|
||||
- See `scraping-guide.md` for full year-scraping instructions and script templates
|
||||
- See `html-structures.md` for HTML parsing patterns per restaurant type
|
||||
- Project dir: `\\WinServ-20-3.chns.local\Profiles\derekc\Documents\Coding Projects\Gitea-CooperandGoodman-Inlander-Restaurant-Week-Picker\Inlander-Restaurant-Week-Picker`
|
||||
|
||||
## Key Constraints (CRITICAL)
|
||||
- **WebFetch cannot access web.archive.org** — use `curl` via Bash tool instead
|
||||
- **PowerShell cannot run scripts from UNC paths** (\\server\...) — always `cp` scripts to local temp first
|
||||
- **bash `/tmp`** = `C:\Users\DEREKC~1.CHN\AppData\Local\Temp` (8.3 short name)
|
||||
- **PowerShell temp** = `C:\Users\derekc.CHNSLocal\AppData\Local\Temp` (long name) — same dir, different string
|
||||
- **Wayback Machine rate limits** to ~20 requests before throttling with 429; use 3-5 sec delays, wait 30+ min after getting blocked
|
||||
|
||||
## JSON Schema
|
||||
Each entry in `YEAR-restaurants.json`:
|
||||
```json
|
||||
{
|
||||
"name": "Restaurant Name",
|
||||
"slug": "restaurantslug",
|
||||
"price": 45,
|
||||
"areas": ["Downtown"],
|
||||
"cuisine": "American",
|
||||
"url": "https://inlanderrestaurantweek.com/project/SLUG/",
|
||||
"menu": {
|
||||
"hours": "Menu served 5pm-close",
|
||||
"phone": "(509) 555-1234",
|
||||
"courses": {
|
||||
"First Course": [{"name": "Dish Name", "desc": "Description"}],
|
||||
"Second Course": [...],
|
||||
"Third Course": [...]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Price is always 25, 35, or 45. gardenparty genuinely has 4 Third Course options.
|
||||
|
||||
## 2025 Data Status
|
||||
- **File**: `2025-restaurants.json` (121 restaurants)
|
||||
- **Wayback snapshot used**: `20250306132630` (primary), `20250401000000` (backup for some)
|
||||
- **Complete (3/3/3+)**: 111 restaurants
|
||||
- **gardenparty**: 3/3/4 — correct, it genuinely offers 4 dessert choices
|
||||
- **tavolata**: 3/3/0 — needs fix-tavolata.ps1 run when rate limit resets
|
||||
- **0/0/0 (JS-only, unrecoverable)**: heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza
|
||||
|
||||
## Scripts in Project Directory
|
||||
- `fix-tavolata.ps1` — run after rate limit resets to recover tavolata Third Course
|
||||
- Copy to local temp and run: `cp ...\fix-tavolata.ps1 C:\Users\derekc.CHNSLocal\AppData\Local\Temp\`
|
||||
- Then: `powershell.exe -ExecutionPolicy Bypass -File C:\Users\derekc.CHNSLocal\AppData\Local\Temp\fix-tavolata.ps1`
|
||||
152
memory/html-structures.md
Normal file
152
memory/html-structures.md
Normal file
@@ -0,0 +1,152 @@
|
||||
# IRW Website HTML Structure Reference
|
||||
|
||||
## Restaurant Page URL
|
||||
Live: `https://inlanderrestaurantweek.com/project/SLUG/`
|
||||
Archived: `https://web.archive.org/web/TIMESTAMP/https://inlanderrestaurantweek.com/project/SLUG/`
|
||||
|
||||
## Page Framework
|
||||
The site uses WordPress + Divi theme. Relevant container class: `et_pb_text_inner`.
|
||||
Each course section typically occupies one or two `et_pb_text_inner` divs.
|
||||
|
||||
---
|
||||
|
||||
## Course Layout Types
|
||||
|
||||
### Layout A — Heading and items in SEPARATE divs (most restaurants)
|
||||
```html
|
||||
<div class="et_pb_text_inner"><h3>First Course</h3></div>
|
||||
<div class="et_pb_text_inner">
|
||||
<p><strong>Dish Name</strong><br/>Description</p>
|
||||
<p><strong>Dish Name 2</strong><br/>Description 2</p>
|
||||
</div>
|
||||
<div class="et_pb_text_inner"><h3>Second Course</h3></div>
|
||||
...
|
||||
```
|
||||
|
||||
### Layout B — Heading and items in SAME div (tavolata, durkins, table13, others)
|
||||
```html
|
||||
<div class="et_pb_text_inner">
|
||||
<h3>First Course</h3>
|
||||
<p><strong>Dish Name</strong><br/>Description</p>
|
||||
<p><strong>Dish Name 2</strong><br/>Description 2</p>
|
||||
</div>
|
||||
<div class="et_pb_text_inner">
|
||||
<h3>Second Course</h3>
|
||||
...
|
||||
</div>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dish Name Tag Styles
|
||||
|
||||
### Style 1 — `<strong>` tag (most restaurants)
|
||||
Examples: 315cuisine, anthonys, bardenay, barkrescuepub, etc.
|
||||
```html
|
||||
<p><strong>Dish Name</strong><br/>Description text here</p>
|
||||
<p><strong>Dish Name</strong> <br/>With space before br</p>
|
||||
```
|
||||
|
||||
### Style 2 — `<b>` tag with `<br/>` inside (India House, Lebanon, Karma, ponderosa)
|
||||
```html
|
||||
<p><b>Dish Name <br/></b><span>Description text</span></p>
|
||||
<p><b>Dish Name<br/></b> Description without span</p>
|
||||
```
|
||||
Key: name is inside `<b>`, the `<br/>` is INSIDE the `<b>` tag.
|
||||
|
||||
### Style 3 — `<b>` + `<strong>` combo (1898 restaurant)
|
||||
```html
|
||||
<p><span><b>First Part</b></span><strong>Second Part</strong> Description</p>
|
||||
```
|
||||
Full dish name = "First Part" + " " + "Second Part"
|
||||
|
||||
---
|
||||
|
||||
## Field Extraction Patterns
|
||||
|
||||
### Name (from page title)
|
||||
```
|
||||
<title>Restaurant Name | Inlander Restaurant Week</title>
|
||||
```
|
||||
Regex: `<title>(.+?) \| Inlander`
|
||||
|
||||
### Price (WARNING: unreliable — use price listing page instead)
|
||||
```html
|
||||
<h1 style="text-align: left;"><strong>$45</strong></h1>
|
||||
```
|
||||
Regex: `<strong>\$(\d+)</strong>`
|
||||
PROBLEM: Some pages show drink prices like $22 that match before the real price.
|
||||
SOLUTION: Always build an authoritative slug→price map from the price listing page.
|
||||
|
||||
### Price Listing Page — Authoritative Prices
|
||||
URL: `https://inlanderrestaurantweek.com/price/` (or Wayback archived version)
|
||||
```html
|
||||
<article class="et_pb_portfolio_item ... project_category_45 ...">
|
||||
...
|
||||
<a href="https://inlanderrestaurantweek.com/project/SLUG/">
|
||||
```
|
||||
Extract price tier from `project_category_(25|35|45)` CSS class.
|
||||
Extract slug from `href=".../project/SLUG/"`.
|
||||
|
||||
### Cuisine
|
||||
```html
|
||||
CUISINE: AMERICAN COMFORT FOOD
|
||||
```
|
||||
Often inside `<strong>` or `<em>` tags. Extract uppercase text after "CUISINE:".
|
||||
Apply `.ToTitleCase()` for proper formatting.
|
||||
|
||||
### Phone
|
||||
Area codes: 509 (Spokane area) or 208 (Idaho/CDA area)
|
||||
Pattern: `(509) 555-1234` or `(208) 555-1234`
|
||||
Regex: `\((?:208|509)\) \d{3}-\d{4}`
|
||||
|
||||
### Hours
|
||||
```
|
||||
Menu served 5pm-9pm nightly
|
||||
Menu served Thursday-Sunday, 5-9pm
|
||||
```
|
||||
Regex: `Menu served [^<]+`
|
||||
|
||||
### Area
|
||||
Look for area keywords (ALL CAPS in source) anywhere in the HTML:
|
||||
- DOWNTOWN, NORTH SPOKANE, SOUTH SPOKANE, WEST SPOKANE, SPOKANE VALLEY
|
||||
- AIRWAY HEIGHTS, LIBERTY LAKE, COEUR D'ALENE, POST FALLS, HAYDEN, ATHOL, WORLEY
|
||||
Default to ["Downtown"] if nothing matched.
|
||||
Some restaurants appear in multiple areas — collect all matches.
|
||||
|
||||
---
|
||||
|
||||
## Dietary Tag Filtering
|
||||
Skip these as dish names — they appear in `<strong>` but are dietary labels, not dish names:
|
||||
- GF (gluten free)
|
||||
- GFA (gluten free available)
|
||||
- V, V+ (vegetarian, vegan)
|
||||
- DF, DFA (dairy free, dairy free available)
|
||||
- V:, V+A (legend lines)
|
||||
- 2025 (year marker some restaurants include)
|
||||
- Drink (some restaurants label beverage course)
|
||||
|
||||
Full regex: `^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$`
|
||||
Also skip names matching `^[A-Z]{1,3}:` (legend lines like "GF: Gluten Free")
|
||||
Also skip names shorter than 3 chars or longer than 80 chars.
|
||||
|
||||
---
|
||||
|
||||
## Restaurants by Known HTML Style (2025)
|
||||
|
||||
**Layout B (same-block)**: tavolata, durkins, table13, terraza, and others
|
||||
**Style 2 (`<b>` tags)**: indiahouse, lebanon, karma, ponderosa, collectivekitchen, dryfly, masselowslounge, vieuxcarre, wileys, osprey, shawnodonnells, ganderryegrass
|
||||
**Style 3 (`<b>`+`<strong>` combo)**: 1898
|
||||
|
||||
Note: These styles may change year to year as restaurants update their pages.
|
||||
Always check a few representative pages before assuming the same structure applies.
|
||||
|
||||
---
|
||||
|
||||
## JS-Only Pages (no static HTML menu content)
|
||||
These restaurants had no recoverable menu data from any Wayback snapshot in 2025:
|
||||
heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza
|
||||
|
||||
Their pages are fully JS-rendered — the static HTML captured by Wayback Machine
|
||||
shows the page shell but not the menu content. For future years, these may or may not
|
||||
have static caches depending on server-side rendering changes.
|
||||
237
memory/scraping-guide.md
Normal file
237
memory/scraping-guide.md
Normal file
@@ -0,0 +1,237 @@
|
||||
# IRW Scraping Guide — Full Process for Adding a New Year
|
||||
|
||||
## Overview
|
||||
The Inlander Restaurant Week website (inlanderrestaurantweek.com) is WordPress/Divi.
|
||||
Menu pages are partially JS-rendered but WP-Super-Cache creates static HTML snapshots
|
||||
that the Wayback Machine archives. We scrape those static snapshots.
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Find Restaurant Slugs
|
||||
|
||||
Fetch the price listing page to get all slugs for that year:
|
||||
```bash
|
||||
curl -s "https://web.archive.org/web/TIMESTAMP/https://inlanderrestaurantweek.com/price/" \
|
||||
-o /tmp/irw-price-YEAR.html
|
||||
```
|
||||
|
||||
Pick a timestamp close to the event (Wayback Machine format: YYYYMMDDHHmmss).
|
||||
The price listing page has portfolio items like:
|
||||
```html
|
||||
<article class="et_pb_portfolio_item ... project_category_45">
|
||||
<a href="https://inlanderrestaurantweek.com/project/SLUG/">
|
||||
```
|
||||
Extract slug from the href. The class `project_category_(25|35|45)` gives authoritative price.
|
||||
|
||||
**Important**: Scrape the price listing page FIRST and save the slug→price map.
|
||||
Some restaurant pages have drink prices ($22, $33) that confuse the price parser.
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Scrape Each Restaurant Page
|
||||
|
||||
Use a PowerShell script (written to project dir, copied to local temp to run):
|
||||
|
||||
**Wayback Machine URL format**:
|
||||
```
|
||||
https://web.archive.org/web/TIMESTAMP/https://inlanderrestaurantweek.com/project/SLUG/
|
||||
```
|
||||
|
||||
**Key fields to extract**:
|
||||
```powershell
|
||||
# Name
|
||||
$nameM = [regex]::Match($html, '<title>(.+?) \| Inlander')
|
||||
|
||||
# Price (from page, but USE PRICE LISTING MAP - this can be wrong)
|
||||
$priceM = [regex]::Match($html, '<strong>\$(\d+)</strong>')
|
||||
|
||||
# Cuisine
|
||||
$cuisineM = [regex]::Match($html, 'CUISINE:\s*([A-Z][A-Za-z/ ]+?)(?:\s*</|\s*<)')
|
||||
$cuisine = (Get-Culture).TextInfo.ToTitleCase($c.ToLower())
|
||||
|
||||
# Phone
|
||||
$phoneM = [regex]::Match($html, '\((?:208|509)\) \d{3}-\d{4}')
|
||||
|
||||
# Hours
|
||||
$hoursM = [regex]::Match($html, 'Menu served [^<]+')
|
||||
|
||||
# Area (match against known area keys, case-insensitive)
|
||||
$areaMap keys: "AIRWAY HEIGHTS","ATHOL","COEUR D'ALENE","POST FALLS","HAYDEN",
|
||||
"LIBERTY LAKE","NORTH SPOKANE","SOUTH SPOKANE","SPOKANE VALLEY",
|
||||
"WEST SPOKANE","WORLEY","DOWNTOWN"
|
||||
```
|
||||
|
||||
**Rate limiting**: Add `Start-Sleep -Milliseconds 2000` between each request.
|
||||
After a 429, stop and wait 30+ minutes before trying again.
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Parse Menu Courses
|
||||
|
||||
### Course Block Extraction (`Get-CourseBlock`)
|
||||
Two HTML layouts exist:
|
||||
|
||||
**Layout A** (most common): heading and items in SEPARATE `et_pb_text_inner` blocks
|
||||
```powershell
|
||||
# Strategy 1: find content between this label and next label
|
||||
$m = [regex]::Match($html, [regex]::Escape($label) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')', $opts)
|
||||
|
||||
# Strategy 3 (fallback): items in next et_pb_text_inner block
|
||||
$im = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts)
|
||||
```
|
||||
|
||||
**Layout B** (some restaurants — tavolata, durkins, table13, etc.): heading + items in SAME block
|
||||
```powershell
|
||||
# Strategy 2: extract <p> tags after </h3> within same div
|
||||
$sameDivM = [regex]::Match($sub, '(?s)</h[123]>\s*(<p.+?)(?=</div>)', $opts)
|
||||
```
|
||||
|
||||
### Dish Parsing (`Parse-Dish`)
|
||||
Three tag styles exist:
|
||||
|
||||
**Style 1** (most restaurants): `<strong>` for name
|
||||
```html
|
||||
<p><strong>Dish Name</strong><br/>Description text</p>
|
||||
```
|
||||
|
||||
**Style 2** (India House, Lebanon, Karma, others): `<b>` with `<br/>` before `</b>`
|
||||
```html
|
||||
<p><b>Dish Name <br/></b><span>Description text</span></p>
|
||||
```
|
||||
|
||||
**Style 3** (1898): `<b>` + `<strong>` combination
|
||||
```html
|
||||
<p><span><b>Part1</b></span><strong>Part2</strong> Description</p>
|
||||
```
|
||||
|
||||
**Multi-strategy parser** (handles all three):
|
||||
```powershell
|
||||
function Parse-Dish($pContent) {
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
|
||||
# Style 2: <b>Name <br/></b>
|
||||
$bWithBrM = [regex]::Match($pContent, '(?s)<b>(.*?)<br\s*/?>', $opts)
|
||||
if ($bWithBrM.Success) {
|
||||
$name = Get-CleanText $bWithBrM.Groups[1].Value
|
||||
if (Test-ValidDishName $name) {
|
||||
$desc = Get-CleanText ($pContent.Substring($bWithBrM.Index + $bWithBrM.Length))
|
||||
return [PSCustomObject]@{ name = $name; desc = $desc }
|
||||
}
|
||||
}
|
||||
|
||||
# Style 3: <b>Part1</b>...<strong>Part2</strong>
|
||||
$bM = [regex]::Match($pContent, '(?s)<b>(.*?)</b>', $opts)
|
||||
if ($bM.Success) {
|
||||
$namePart = Get-CleanText $bM.Groups[1].Value
|
||||
if (Test-ValidDishName $namePart) {
|
||||
$afterB = $pContent.Substring($bM.Index + $bM.Length)
|
||||
$sM2 = [regex]::Match($afterB, '(?s)^[^<]*<strong>(.*?)</strong>(.*)', $opts)
|
||||
if ($sM2.Success) {
|
||||
$p2 = Get-CleanText $sM2.Groups[1].Value
|
||||
if (-not (Test-DietaryTag $p2) -and $p2.Length -ge 2) {
|
||||
return [PSCustomObject]@{ name = "$namePart $p2".Trim(); desc = Get-CleanText $sM2.Groups[2].Value }
|
||||
}
|
||||
}
|
||||
return [PSCustomObject]@{ name = $namePart; desc = Get-CleanText $afterB }
|
||||
}
|
||||
}
|
||||
|
||||
# Style 1: <strong>Name</strong>
|
||||
$sM = [regex]::Match($pContent, '(?s)<strong>(.*?)</strong>', $opts)
|
||||
if ($sM.Success) {
|
||||
$name = Get-CleanText $sM.Groups[1].Value
|
||||
if (-not (Test-ValidDishName $name)) { return $null }
|
||||
$afterBr = ''
|
||||
if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] }
|
||||
else { $am = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts); if ($am.Success) { $afterBr = $am.Groups[1].Value } }
|
||||
return [PSCustomObject]@{ name = $name; desc = Get-CleanText $afterBr }
|
||||
}
|
||||
return $null
|
||||
}
|
||||
|
||||
function Test-ValidDishName($name) {
|
||||
$name.Length -ge 3 -and $name.Length -le 80 -and
|
||||
$name -notmatch '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' -and
|
||||
$name -notmatch '^[A-Z]{1,3}:'
|
||||
}
|
||||
|
||||
function Test-DietaryTag($str) {
|
||||
$str -match '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$'
|
||||
}
|
||||
```
|
||||
|
||||
### HTML Cleanup
|
||||
```powershell
|
||||
function Get-CleanText($rawHtml) {
|
||||
$t = $rawHtml -replace '<[^>]+>', ' '
|
||||
$t = $t -replace '&', '&' -replace ''', "'" -replace '"', '"'
|
||||
$t = $t -replace '<', '<' -replace '>', '>' -replace ' ', ' '
|
||||
$t = $t -replace '–', '-' -replace '—', '-'
|
||||
($t -replace '\s+', ' ').Trim()
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 4: Fix Prices
|
||||
|
||||
After scraping, apply authoritative prices from the price listing page:
|
||||
- Parse `project_category_(25|35|45)` CSS class from portfolio items
|
||||
- Match slug from adjacent `href` attribute
|
||||
- Build a hashtable and apply to all entries
|
||||
|
||||
Common gotcha: Restaurant pages may show $22 (wine), $33 (lunch) — these are NOT the event price.
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Recover Missing Restaurants
|
||||
|
||||
If a restaurant has 0/0/0 courses:
|
||||
1. Try alternate Wayback timestamps: `20250401000000`, `20250415000000`, `20250501000000`, `20250601000000`
|
||||
2. Check if page uses Layout B (same-block) — add Strategy 2 to course block extractor
|
||||
3. Check if page uses `<b>` tags instead of `<strong>` for dish names
|
||||
|
||||
**Known JS-only restaurants** (no static cache recoverable for 2025):
|
||||
heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza
|
||||
|
||||
---
|
||||
|
||||
## Step 6: Output and Validation
|
||||
|
||||
```powershell
|
||||
# Save as UTF-8 (important — special characters in restaurant names)
|
||||
$json = $data | ConvertTo-Json -Depth 10
|
||||
[System.IO.File]::WriteAllText($outPath, $json, [System.Text.Encoding]::UTF8)
|
||||
|
||||
# Validate: list any restaurant not at 3/3/3
|
||||
$data | Where-Object {
|
||||
$_.menu.courses.'First Course'.Count -ne 3 -or
|
||||
$_.menu.courses.'Second Course'.Count -ne 3 -or
|
||||
$_.menu.courses.'Third Course'.Count -ne 3
|
||||
} | ForEach-Object {
|
||||
"$($_.slug): $($_.menu.courses.'First Course'.Count)/$($_.menu.courses.'Second Course'.Count)/$($_.menu.courses.'Third Course'.Count)"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## PowerShell Script Execution Pattern (REQUIRED)
|
||||
|
||||
```bash
|
||||
# Write script to project dir (via Write tool or Edit)
|
||||
# Then in bash:
|
||||
cp "//WinServ-20-3.chns.local/Profiles/derekc/Documents/Coding Projects/.../script.ps1" \
|
||||
"/c/Users/derekc.CHNSLocal/AppData/Local/Temp/script.ps1"
|
||||
powershell.exe -ExecutionPolicy Bypass -File "C:\Users\derekc.CHNSLocal\AppData\Local\Temp\script.ps1"
|
||||
```
|
||||
|
||||
**Never** use `powershell -Command "..."` for multi-line scripts — escaping is unreliable.
|
||||
**Never** try to run `.ps1` directly from `\\WinServ-20-3...` UNC path — execution policy blocks it.
|
||||
|
||||
---
|
||||
|
||||
## PowerShell Gotchas
|
||||
- `"$slug: text"` fails if `:` follows var — use `"${slug}: text"`
|
||||
- Function names like `Is-X`, `Decode-X`, `Parse-X` get PSScriptAnalyzer warnings (unapproved verbs) but work fine
|
||||
- `return ,$array` (comma prefix) forces PowerShell to return an array, not unroll it
|
||||
- `[System.IO.File]::WriteAllText(path, json, UTF8)` — use this, not `Out-File`, to avoid BOM/encoding issues
|
||||
@@ -1,179 +0,0 @@
|
||||
# rescrape-missing.ps1 - Re-fetches 0-course and partial restaurants
|
||||
# using CDX API to find best available Wayback Machine snapshot
|
||||
|
||||
$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$jsonPath = Join-Path $projectDir '2025-restaurants.json'
|
||||
|
||||
$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
|
||||
|
||||
function Decode-Html($str) {
|
||||
if (-not $str) { return $str }
|
||||
$s = $str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' '
|
||||
$s.Trim()
|
||||
}
|
||||
|
||||
function Get-CleanText($rawHtml) {
|
||||
$t = $rawHtml -replace '<[^>]+>', ' '
|
||||
$t = Decode-Html $t
|
||||
$t.Trim()
|
||||
}
|
||||
|
||||
function Invoke-Dishes($courseHtml) {
|
||||
$dishes = [System.Collections.ArrayList]@()
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
|
||||
$pMatches = [regex]::Matches($courseHtml, '<p[^>]*>(.*?)</p>', $opts)
|
||||
foreach ($pm in $pMatches) {
|
||||
$pContent = $pm.Groups[1].Value
|
||||
if ($pContent -notmatch '<strong>') { continue }
|
||||
|
||||
$nameM = [regex]::Match($pContent, '<strong>(.*?)</strong>', $opts)
|
||||
if (-not $nameM.Success) { continue }
|
||||
$name = Get-CleanText $nameM.Groups[1].Value
|
||||
|
||||
if ($name -match '^(GF|GFA|V\+?|DF|V:|2025|Drink)') { continue }
|
||||
if ($name.Length -lt 3 -or $name.Length -gt 80) { continue }
|
||||
if ($name -match '^[A-Z]{1,3}:') { continue }
|
||||
|
||||
$afterBr = ''
|
||||
if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] }
|
||||
else {
|
||||
$afterStrong = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts)
|
||||
if ($afterStrong.Success) { $afterBr = $afterStrong.Groups[1].Value }
|
||||
}
|
||||
$desc = Get-CleanText $afterBr
|
||||
$null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc })
|
||||
}
|
||||
return ,$dishes
|
||||
}
|
||||
|
||||
function Invoke-CourseBlock($html, $courseLabel, $nextLabel) {
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
if ($nextLabel) {
|
||||
$pattern = [regex]::Escape($courseLabel) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'
|
||||
$m = [regex]::Match($html, $pattern, $opts)
|
||||
if ($m.Success) { return $m.Groups[1].Value }
|
||||
}
|
||||
$idx = $html.IndexOf($courseLabel)
|
||||
if ($idx -ge 0) {
|
||||
$sub = $html.Substring($idx, [Math]::Min(6000, $html.Length - $idx))
|
||||
$innerM = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts)
|
||||
if ($innerM.Success) { return $innerM.Groups[1].Value }
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
function Parse-RestaurantHtml($html) {
|
||||
$fc = Invoke-CourseBlock $html 'First Course' 'Second Course'
|
||||
$sc = Invoke-CourseBlock $html 'Second Course' 'Third Course'
|
||||
$tc = Invoke-CourseBlock $html 'Third Course' $null
|
||||
return @{
|
||||
first = Invoke-Dishes $fc
|
||||
second = Invoke-Dishes $sc
|
||||
third = Invoke-Dishes $tc
|
||||
hours = if ($html -match 'Menu served ([^<]+)') { "Menu served $($matches[1].Trim())" } else { '' }
|
||||
phone = if ($html -match '\((?:208|509)\) \d{3}-\d{4}') { $matches[0] } else { '' }
|
||||
}
|
||||
}
|
||||
|
||||
# Find which restaurants need re-scraping
|
||||
$needsRescrape = $data | Where-Object {
|
||||
$c1 = $_.menu.courses.'First Course'.Count
|
||||
$c2 = $_.menu.courses.'Second Course'.Count
|
||||
$c3 = $_.menu.courses.'Third Course'.Count
|
||||
($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or
|
||||
($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3)
|
||||
}
|
||||
|
||||
Write-Host "Restaurants to re-scrape: $($needsRescrape.Count)"
|
||||
Write-Host ""
|
||||
|
||||
foreach ($r in $needsRescrape) {
|
||||
$slug = $r.slug
|
||||
Write-Host "[$slug] Looking up CDX snapshots..." -NoNewline
|
||||
|
||||
try {
|
||||
# CDX API: find snapshots from Jan-May 2025
|
||||
$cdxUrl = "https://web.archive.org/cdx/search/cdx?url=inlanderrestaurantweek.com/project/$slug/&output=text&limit=10&from=20250101&to=20250501&filter=statuscode:200&fl=timestamp"
|
||||
$cdxResp = Invoke-WebRequest -Uri $cdxUrl -UseBasicParsing -TimeoutSec 30 -ErrorAction Stop
|
||||
$timestamps = $cdxResp.Content -split "`n" | Where-Object { $_ -match '^\d{14}$' }
|
||||
|
||||
if ($timestamps.Count -eq 0) {
|
||||
Write-Host " No CDX snapshots found"
|
||||
continue
|
||||
}
|
||||
|
||||
Write-Host " Found $($timestamps.Count) snapshots"
|
||||
|
||||
$best = $null
|
||||
foreach ($ts in $timestamps) {
|
||||
Write-Host " Trying $ts..." -NoNewline
|
||||
try {
|
||||
$pageUrl = "https://web.archive.org/web/$ts/https://inlanderrestaurantweek.com/project/$slug/"
|
||||
$resp = Invoke-WebRequest -Uri $pageUrl -UseBasicParsing -TimeoutSec 45 -ErrorAction Stop
|
||||
$html = $resp.Content
|
||||
|
||||
$parsed = Parse-RestaurantHtml $html
|
||||
$c1 = $parsed.first.Count
|
||||
$c2 = $parsed.second.Count
|
||||
$c3 = $parsed.third.Count
|
||||
Write-Host " $c1/$c2/$c3"
|
||||
|
||||
# Better than what we have?
|
||||
$curr1 = $r.menu.courses.'First Course'.Count
|
||||
$curr2 = $r.menu.courses.'Second Course'.Count
|
||||
$curr3 = $r.menu.courses.'Third Course'.Count
|
||||
$currTotal = $curr1 + $curr2 + $curr3
|
||||
$newTotal = $c1 + $c2 + $c3
|
||||
|
||||
if ($newTotal -gt $currTotal -or ($c1 -ge 3 -and $c2 -ge 3 -and $c3 -ge 3)) {
|
||||
$best = $parsed
|
||||
if ($c1 -ge 3 -and $c2 -ge 3 -and $c3 -ge 3) { break }
|
||||
}
|
||||
} catch {
|
||||
Write-Host " FETCH ERROR"
|
||||
}
|
||||
Start-Sleep -Milliseconds 400
|
||||
}
|
||||
|
||||
if ($best) {
|
||||
if ($best.first.Count -gt $r.menu.courses.'First Course'.Count -or
|
||||
$best.second.Count -gt $r.menu.courses.'Second Course'.Count -or
|
||||
$best.third.Count -gt $r.menu.courses.'Third Course'.Count) {
|
||||
Write-Host " -> Updating with $($best.first.Count)/$($best.second.Count)/$($best.third.Count) courses"
|
||||
$r.menu.courses.'First Course' = @($best.first)
|
||||
$r.menu.courses.'Second Course' = @($best.second)
|
||||
$r.menu.courses.'Third Course' = @($best.third)
|
||||
if ($best.hours -and -not $r.menu.hours) { $r.menu.hours = $best.hours }
|
||||
if ($best.phone -and -not $r.menu.phone) { $r.menu.phone = $best.phone }
|
||||
}
|
||||
} else {
|
||||
Write-Host " -> No improvement found"
|
||||
}
|
||||
|
||||
} catch {
|
||||
Write-Host " CDX ERROR: $_"
|
||||
}
|
||||
Start-Sleep -Milliseconds 500
|
||||
}
|
||||
|
||||
# ---- Final report ----
|
||||
Write-Host ""
|
||||
Write-Host "=== Final Status ==="
|
||||
$data | Where-Object {
|
||||
$c1 = $_.menu.courses.'First Course'.Count
|
||||
$c2 = $_.menu.courses.'Second Course'.Count
|
||||
$c3 = $_.menu.courses.'Third Course'.Count
|
||||
($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or
|
||||
($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3)
|
||||
} | ForEach-Object {
|
||||
$c1 = $_.menu.courses.'First Course'.Count
|
||||
$c2 = $_.menu.courses.'Second Course'.Count
|
||||
$c3 = $_.menu.courses.'Third Course'.Count
|
||||
Write-Host " $($_.slug): $c1/$c2/$c3"
|
||||
}
|
||||
|
||||
$json = $data | ConvertTo-Json -Depth 10
|
||||
[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8)
|
||||
Write-Host ""
|
||||
Write-Host "Saved to $jsonPath"
|
||||
@@ -1,166 +0,0 @@
|
||||
# rescrape2-missing.ps1 - Re-fetches problematic restaurants with multiple timestamps
|
||||
# Uses fixed timestamps (no CDX API) with generous delays to avoid rate limiting
|
||||
|
||||
$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$jsonPath = Join-Path $projectDir '2025-restaurants.json'
|
||||
|
||||
$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
|
||||
|
||||
function Decode-Html($str) {
|
||||
if (-not $str) { return $str }
|
||||
($str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ').Trim()
|
||||
}
|
||||
|
||||
function Get-CleanText($rawHtml) {
|
||||
Decode-Html ($rawHtml -replace '<[^>]+>', ' ')
|
||||
}
|
||||
|
||||
function Invoke-Dishes($courseHtml) {
|
||||
$dishes = [System.Collections.ArrayList]@()
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
$pMatches = [regex]::Matches($courseHtml, '<p[^>]*>(.*?)</p>', $opts)
|
||||
foreach ($pm in $pMatches) {
|
||||
$pContent = $pm.Groups[1].Value
|
||||
if ($pContent -notmatch '<strong>') { continue }
|
||||
$nameM = [regex]::Match($pContent, '<strong>(.*?)</strong>', $opts)
|
||||
if (-not $nameM.Success) { continue }
|
||||
$name = Get-CleanText $nameM.Groups[1].Value
|
||||
if ($name -match '^(GF|GFA|V\+?|DF|V:|2025|Drink)') { continue }
|
||||
if ($name.Length -lt 3 -or $name.Length -gt 80) { continue }
|
||||
if ($name -match '^[A-Z]{1,3}:') { continue }
|
||||
$afterBr = ''
|
||||
if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] }
|
||||
else {
|
||||
$am = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts)
|
||||
if ($am.Success) { $afterBr = $am.Groups[1].Value }
|
||||
}
|
||||
$desc = Get-CleanText $afterBr
|
||||
$null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc })
|
||||
}
|
||||
return ,$dishes
|
||||
}
|
||||
|
||||
function Invoke-CourseBlock($html, $courseLabel, $nextLabel) {
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
if ($nextLabel) {
|
||||
$m = [regex]::Match($html, ([regex]::Escape($courseLabel) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'), $opts)
|
||||
if ($m.Success) { return $m.Groups[1].Value }
|
||||
}
|
||||
$idx = $html.IndexOf($courseLabel)
|
||||
if ($idx -ge 0) {
|
||||
$sub = $html.Substring($idx, [Math]::Min(6000, $html.Length - $idx))
|
||||
$im = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts)
|
||||
if ($im.Success) { return $im.Groups[1].Value }
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
function Fetch-And-Parse($url) {
|
||||
$resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 45 -ErrorAction Stop
|
||||
$html = $resp.Content
|
||||
# Check if it's a 429 page
|
||||
if ($html -match '429 Too Many Requests') { throw "Rate limited" }
|
||||
$fc = Invoke-CourseBlock $html 'First Course' 'Second Course'
|
||||
$sc = Invoke-CourseBlock $html 'Second Course' 'Third Course'
|
||||
$tc = Invoke-CourseBlock $html 'Third Course' $null
|
||||
return @{
|
||||
first = Invoke-Dishes $fc
|
||||
second = Invoke-Dishes $sc
|
||||
third = Invoke-Dishes $tc
|
||||
hours = if ($html -match 'Menu served ([^<]+)') { "Menu served $($matches[1].Trim())" } else { '' }
|
||||
phone = if ($html -match '\((?:208|509)\) \d{3}-\d{4}') { $matches[0] } else { '' }
|
||||
total = 0
|
||||
}
|
||||
}
|
||||
|
||||
# Timestamps to try for each restaurant (spanning Feb-May 2025)
|
||||
$timestamps = @(
|
||||
'20250301000000',
|
||||
'20250308000000',
|
||||
'20250315000000',
|
||||
'20250401000000',
|
||||
'20250415000000',
|
||||
'20250501000000'
|
||||
)
|
||||
|
||||
# Find problematic restaurants
|
||||
$problems = $data | Where-Object {
|
||||
$c1 = $_.menu.courses.'First Course'.Count
|
||||
$c2 = $_.menu.courses.'Second Course'.Count
|
||||
$c3 = $_.menu.courses.'Third Course'.Count
|
||||
($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or
|
||||
($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3)
|
||||
}
|
||||
|
||||
Write-Host "Restaurants to retry: $($problems.Count)"
|
||||
Write-Host "Starting with 3-second delay between requests..."
|
||||
Write-Host ""
|
||||
|
||||
$i = 0
|
||||
foreach ($r in $problems) {
|
||||
$i++
|
||||
$slug = $r.slug
|
||||
$curr1 = $r.menu.courses.'First Course'.Count
|
||||
$curr2 = $r.menu.courses.'Second Course'.Count
|
||||
$curr3 = $r.menu.courses.'Third Course'.Count
|
||||
Write-Host "[$i/$($problems.Count)] $slug (currently $curr1/$curr2/$curr3)"
|
||||
|
||||
$bestResult = $null
|
||||
$bestTotal = $curr1 + $curr2 + $curr3
|
||||
|
||||
foreach ($ts in $timestamps) {
|
||||
$url = "https://web.archive.org/web/$ts/https://inlanderrestaurantweek.com/project/$slug/"
|
||||
Write-Host " Trying $ts..." -NoNewline
|
||||
try {
|
||||
$result = Fetch-And-Parse $url
|
||||
$t = $result.first.Count + $result.second.Count + $result.third.Count
|
||||
Write-Host " $($result.first.Count)/$($result.second.Count)/$($result.third.Count)"
|
||||
if ($t -gt $bestTotal) {
|
||||
$bestTotal = $t
|
||||
$bestResult = $result
|
||||
if ($result.first.Count -ge 3 -and $result.second.Count -ge 3 -and $result.third.Count -ge 3) {
|
||||
break # Perfect - no need to try more timestamps
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
Write-Host " FAIL: $_"
|
||||
}
|
||||
Start-Sleep -Milliseconds 3000
|
||||
}
|
||||
|
||||
if ($bestResult -and $bestTotal -gt ($curr1 + $curr2 + $curr3)) {
|
||||
Write-Host " -> Updating: $($bestResult.first.Count)/$($bestResult.second.Count)/$($bestResult.third.Count)"
|
||||
$r.menu.courses.'First Course' = @($bestResult.first)
|
||||
$r.menu.courses.'Second Course' = @($bestResult.second)
|
||||
$r.menu.courses.'Third Course' = @($bestResult.third)
|
||||
if ($bestResult.hours -and -not $r.menu.hours) { $r.menu.hours = $bestResult.hours }
|
||||
if ($bestResult.phone -and -not $r.menu.phone) { $r.menu.phone = $bestResult.phone }
|
||||
} else {
|
||||
Write-Host " -> No improvement"
|
||||
}
|
||||
Start-Sleep -Milliseconds 2000
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "=== Final Status ==="
|
||||
$remaining = $data | Where-Object {
|
||||
$c1 = $_.menu.courses.'First Course'.Count
|
||||
$c2 = $_.menu.courses.'Second Course'.Count
|
||||
$c3 = $_.menu.courses.'Third Course'.Count
|
||||
($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or ($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3)
|
||||
}
|
||||
Write-Host "Still incomplete: $($remaining.Count)"
|
||||
foreach ($r in $remaining) {
|
||||
$c1 = $_.menu.courses.'First Course'.Count
|
||||
$c2 = $_.menu.courses.'Second Course'.Count
|
||||
$c3 = $_.menu.courses.'Third Course'.Count
|
||||
$c1 = $r.menu.courses.'First Course'.Count
|
||||
$c2 = $r.menu.courses.'Second Course'.Count
|
||||
$c3 = $r.menu.courses.'Third Course'.Count
|
||||
Write-Host " $($r.slug): $c1/$c2/$c3"
|
||||
}
|
||||
|
||||
$json = $data | ConvertTo-Json -Depth 10
|
||||
[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8)
|
||||
Write-Host ""
|
||||
Write-Host "Saved to $jsonPath"
|
||||
227
scrape-2025.ps1
227
scrape-2025.ps1
@@ -1,227 +0,0 @@
|
||||
# scrape-2025.ps1 - Scrapes 2025 Inlander Restaurant Week menus from Wayback Machine
|
||||
# Run from local path (UNC paths block PS execution)
|
||||
|
||||
$slugs = @(
|
||||
"1898", "24taps", "315cuisine", "ambrosia", "anthonys", "arrowhead", "baba",
|
||||
"backyardpublichouse", "bangkokthai", "bardenay", "barkrescuepub", "beverlys",
|
||||
"blackpearl", "borracho", "burgerdock", "cascadia", "cedars", "centennial",
|
||||
"chaps", "chinook", "chowderhead", "clinkerdagger", "cochinito", "collectivekitchen",
|
||||
"dassteinhaus", "deleons", "deleonstexmex", "dockside", "downriver", "dryfly",
|
||||
"durkins", "east", "emrys", "feastworldkitchen", "flameandcork", "flatstick",
|
||||
"flyinggoat", "fortheloveofgod", "francaise", "ganderryegrass", "gardenparty",
|
||||
"gildedunicorn", "hang10", "heritage", "hogwash", "honey", "hulapot",
|
||||
"indiahouse", "indicana", "inlandpacifickitchen", "irongoat", "ironwoodice",
|
||||
"karma", "kasa", "kismet", "kunisthai", "latahbistro", "lebanon", "legendsoffire",
|
||||
"littledragon", "littlenoodle", "longhornbbq", "loren", "lumberbeard",
|
||||
"macdaddys", "mackenzieriver", "mammamias", "mangotree", "maryhill",
|
||||
"masselowslounge", "max", "meltingpot", "mortys", "northhill", "odohertys",
|
||||
"osprey", "outsider", "palmcourtgrill", "ponderosa", "purenorthwest",
|
||||
"purgatory", "qqsushi", "redtail", "republickitchen", "republicpi", "rut",
|
||||
"safariroom", "saranac", "satay", "sauced", "screamingyak", "seasons",
|
||||
"shawnodonnells", "shelbys", "skewers", "southhillgrill", "southperrylantern",
|
||||
"spencers", "steamplant", "steelhead", "stylus", "sweetlous", "swinglounge",
|
||||
"table13", "tavolata", "terraza", "thaibamboo", "thedambar", "titos",
|
||||
"tomatostreet", "tonysonthelake", "torratea", "truelegends", "twigs",
|
||||
"uprise", "vaqueros", "vicinopizza", "victoryburger", "vieuxcarre",
|
||||
"vineolive", "wileys"
|
||||
)
|
||||
|
||||
$areaMap = [ordered]@{
|
||||
"AIRWAY HEIGHTS" = "Airway Heights"
|
||||
"ATHOL" = "Athol"
|
||||
"COEUR D'ALENE" = "Coeur d'Alene"
|
||||
"POST FALLS" = "Post Falls"
|
||||
"HAYDEN" = "Hayden"
|
||||
"LIBERTY LAKE" = "Liberty Lake"
|
||||
"NORTH SPOKANE" = "North Spokane"
|
||||
"SOUTH SPOKANE" = "South Spokane"
|
||||
"SPOKANE VALLEY" = "Spokane Valley"
|
||||
"WEST SPOKANE" = "West Spokane"
|
||||
"WORLEY" = "Worley"
|
||||
"DOWNTOWN" = "Downtown"
|
||||
}
|
||||
|
||||
function Get-CleanText($rawHtml) {
|
||||
$t = $rawHtml -replace '<[^>]+>', ' '
|
||||
$t = $t -replace '&', '&'
|
||||
$t = $t -replace '<', '<'
|
||||
$t = $t -replace '>', '>'
|
||||
$t = $t -replace '"', '"'
|
||||
$t = $t -replace ''', "'"
|
||||
$t = $t -replace ' ', ' '
|
||||
$t = $t -replace '–', '-'
|
||||
$t = $t -replace '—', '-'
|
||||
$t = $t -replace '\s+', ' '
|
||||
$t.Trim()
|
||||
}
|
||||
|
||||
function Extract-Dishes($courseHtml) {
|
||||
$dishes = [System.Collections.ArrayList]@()
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
|
||||
$pMatches = [regex]::Matches($courseHtml, '<p[^>]*>(.*?)</p>', $opts)
|
||||
|
||||
foreach ($pm in $pMatches) {
|
||||
$pContent = $pm.Groups[1].Value
|
||||
if ($pContent -notmatch '<strong>') { continue }
|
||||
|
||||
# First <strong> = dish name
|
||||
$nameM = [regex]::Match($pContent, '<strong>(.*?)</strong>', $opts)
|
||||
if (-not $nameM.Success) { continue }
|
||||
$name = Get-CleanText $nameM.Groups[1].Value
|
||||
|
||||
# Skip dietary-only names and very short strings
|
||||
if ($name -match '^(GF|GFA|V\+?|DF|V:|2025)$') { continue }
|
||||
if ($name.Length -lt 3) { continue }
|
||||
if ($name -match '^[A-Z]{1,3}:') { continue } # skip legend lines like "GF:"
|
||||
if ($name.Length -gt 80) { continue }
|
||||
|
||||
# Description: everything after first <br/>
|
||||
$afterBr = ''
|
||||
if ($pContent -match '(?s)<br\s*/?>(.*?)$') {
|
||||
$afterBr = $matches[1]
|
||||
} else {
|
||||
$afterStrong = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts)
|
||||
if ($afterStrong.Success) { $afterBr = $afterStrong.Groups[1].Value }
|
||||
}
|
||||
|
||||
$desc = Get-CleanText $afterBr
|
||||
$null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc })
|
||||
}
|
||||
|
||||
return ,$dishes
|
||||
}
|
||||
|
||||
function Extract-CourseBlock($html, $courseLabel, $nextLabel) {
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
|
||||
# Strategy 1: find content in et_pb_text_inner after course label, before next label
|
||||
if ($nextLabel) {
|
||||
$pattern = [regex]::Escape($courseLabel) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'
|
||||
$m = [regex]::Match($html, $pattern, $opts)
|
||||
if ($m.Success) { return $m.Groups[1].Value }
|
||||
}
|
||||
|
||||
# Strategy 2: find the et_pb_text_inner block immediately following the course label
|
||||
$idx = $html.IndexOf($courseLabel)
|
||||
if ($idx -ge 0) {
|
||||
$sub = $html.Substring($idx, [Math]::Min(6000, $html.Length - $idx))
|
||||
# Skip past the heading block and find the next text_inner content
|
||||
$innerM = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts)
|
||||
if ($innerM.Success) { return $innerM.Groups[1].Value }
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
$restaurants = [System.Collections.ArrayList]@()
|
||||
$total = $slugs.Count
|
||||
$i = 0
|
||||
|
||||
foreach ($slug in $slugs) {
|
||||
$i++
|
||||
Write-Host "[$i/$total] Fetching: $slug" -NoNewline
|
||||
|
||||
$url = "https://web.archive.org/web/20250306132630/https://inlanderrestaurantweek.com/project/$slug/"
|
||||
|
||||
try {
|
||||
$response = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 60 -ErrorAction Stop
|
||||
# Read content as bytes then decode as UTF-8 to preserve special chars
|
||||
$bytes = $response.RawContentStream.ToArray()
|
||||
$html = [System.Text.Encoding]::UTF8.GetString($bytes)
|
||||
|
||||
# --- Name ---
|
||||
$nameM = [regex]::Match($html, '<title>(.+?) \| Inlander')
|
||||
$name = if ($nameM.Success) { $nameM.Groups[1].Value.Trim() } else { $slug }
|
||||
|
||||
# --- Price (from <strong>$45</strong> in an h1) ---
|
||||
$priceM = [regex]::Match($html, '<strong>\$(\d+)</strong>')
|
||||
$price = if ($priceM.Success) { [int]$priceM.Groups[1].Value } else { 0 }
|
||||
|
||||
# --- Cuisine ---
|
||||
$cuisineM = [regex]::Match($html, 'CUISINE:\s*([A-Z][A-Za-z/ ]+?)(?:\s*</|\s*<)')
|
||||
$cuisine = ''
|
||||
if ($cuisineM.Success) {
|
||||
$c = $cuisineM.Groups[1].Value.Trim()
|
||||
$cuisine = (Get-Culture).TextInfo.ToTitleCase($c.ToLower())
|
||||
}
|
||||
|
||||
# --- Phone ---
|
||||
$phoneM = [regex]::Match($html, '\((?:208|509)\) \d{3}-\d{4}')
|
||||
$phone = if ($phoneM.Success) { $phoneM.Value } else { '' }
|
||||
|
||||
# --- Area(s) ---
|
||||
$areas = [System.Collections.ArrayList]@()
|
||||
$htmlUpper = $html.ToUpper()
|
||||
foreach ($aKey in $areaMap.Keys) {
|
||||
if ($htmlUpper.Contains($aKey)) {
|
||||
$null = $areas.Add($areaMap[$aKey])
|
||||
}
|
||||
}
|
||||
$areas = @($areas | Select-Object -Unique)
|
||||
if ($areas.Count -eq 0) { $areas = @('Downtown') }
|
||||
|
||||
# --- Hours ---
|
||||
$hoursM = [regex]::Match($html, 'Menu served [^<]+')
|
||||
$hours = if ($hoursM.Success) { $hoursM.Value.Trim() } else { '' }
|
||||
|
||||
# --- Menu Courses ---
|
||||
$fc = Extract-CourseBlock $html 'First Course' 'Second Course'
|
||||
$sc = Extract-CourseBlock $html 'Second Course' 'Third Course'
|
||||
$tc = Extract-CourseBlock $html 'Third Course' $null
|
||||
|
||||
$firstCourse = Extract-Dishes $fc
|
||||
$secondCourse = Extract-Dishes $sc
|
||||
$thirdCourse = Extract-Dishes $tc
|
||||
|
||||
$fc1count = $firstCourse.Count
|
||||
$fc2count = $secondCourse.Count
|
||||
$fc3count = $thirdCourse.Count
|
||||
Write-Host " -> $name [$price] $fc1count/$fc2count/$fc3count courses"
|
||||
|
||||
$null = $restaurants.Add([PSCustomObject]@{
|
||||
name = $name
|
||||
slug = $slug
|
||||
price = $price
|
||||
areas = $areas
|
||||
cuisine = $cuisine
|
||||
url = "https://inlanderrestaurantweek.com/project/$slug/"
|
||||
menu = [PSCustomObject]@{
|
||||
hours = $hours
|
||||
phone = $phone
|
||||
courses = [PSCustomObject]@{
|
||||
'First Course' = @($firstCourse)
|
||||
'Second Course' = @($secondCourse)
|
||||
'Third Course' = @($thirdCourse)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
} catch {
|
||||
Write-Host " ERROR: $_"
|
||||
$null = $restaurants.Add([PSCustomObject]@{
|
||||
name = $slug
|
||||
slug = $slug
|
||||
price = 0
|
||||
areas = @('Downtown')
|
||||
cuisine = ''
|
||||
url = "https://inlanderrestaurantweek.com/project/$slug/"
|
||||
menu = [PSCustomObject]@{
|
||||
hours = 'FETCH_ERROR'
|
||||
phone = ''
|
||||
courses = [PSCustomObject]@{
|
||||
'First Course' = @()
|
||||
'Second Course' = @()
|
||||
'Third Course' = @()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Start-Sleep -Milliseconds 500
|
||||
}
|
||||
|
||||
$outPath = 'C:\Users\derekc.CHNSLocal\AppData\Local\Temp\2025-restaurants.json'
|
||||
$json = $restaurants | ConvertTo-Json -Depth 10
|
||||
[System.IO.File]::WriteAllText($outPath, $json, [System.Text.Encoding]::UTF8)
|
||||
Write-Host ""
|
||||
Write-Host "Done! Saved $($restaurants.Count) restaurants to $outPath"
|
||||
119
test-parse.ps1
119
test-parse.ps1
@@ -1,119 +0,0 @@
|
||||
# Test parsing on local 315cuisine HTML
|
||||
|
||||
$html = [System.IO.File]::ReadAllText('C:\Users\DEREKC~1.CHN\AppData\Local\Temp\test-restaurant.html', [System.Text.Encoding]::UTF8)
|
||||
|
||||
$areaMap = [ordered]@{
|
||||
"AIRWAY HEIGHTS" = "Airway Heights"
|
||||
"ATHOL" = "Athol"
|
||||
"COEUR D'ALENE" = "Coeur d'Alene"
|
||||
"POST FALLS" = "Post Falls"
|
||||
"HAYDEN" = "Hayden"
|
||||
"LIBERTY LAKE" = "Liberty Lake"
|
||||
"NORTH SPOKANE" = "North Spokane"
|
||||
"SOUTH SPOKANE" = "South Spokane"
|
||||
"SPOKANE VALLEY" = "Spokane Valley"
|
||||
"WEST SPOKANE" = "West Spokane"
|
||||
"WORLEY" = "Worley"
|
||||
"DOWNTOWN" = "Downtown"
|
||||
}
|
||||
|
||||
function Get-CleanText($rawHtml) {
|
||||
$t = $rawHtml -replace '<[^>]+>', ' '
|
||||
$t = $t -replace '&', '&'
|
||||
$t = $t -replace '<', '<'
|
||||
$t = $t -replace '>', '>'
|
||||
$t = $t -replace '"', '"'
|
||||
$t = $t -replace ''', "'"
|
||||
$t = $t -replace ' ', ' '
|
||||
$t = $t -replace '\s+', ' '
|
||||
$t.Trim()
|
||||
}
|
||||
|
||||
function Extract-Dishes($courseHtml) {
|
||||
$dishes = [System.Collections.ArrayList]@()
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
|
||||
$pMatches = [regex]::Matches($courseHtml, '<p[^>]*>(.*?)</p>', $opts)
|
||||
|
||||
foreach ($pm in $pMatches) {
|
||||
$pContent = $pm.Groups[1].Value
|
||||
if ($pContent -notmatch '<strong>') { continue }
|
||||
|
||||
$nameM = [regex]::Match($pContent, '<strong>(.*?)</strong>', $opts)
|
||||
if (-not $nameM.Success) { continue }
|
||||
$name = Get-CleanText $nameM.Groups[1].Value
|
||||
|
||||
if ($name -match '^(GF|GFA|V\+?|DF|V:)$') { continue }
|
||||
if ($name.Length -lt 3) { continue }
|
||||
if ($name -match '^[A-Z]{1,3}:') { continue }
|
||||
if ($name.Length -gt 80) { continue }
|
||||
|
||||
$afterBr = ''
|
||||
if ($pContent -match '<br\s*/?>(.*?)$') {
|
||||
$afterBr = $matches[1]
|
||||
} else {
|
||||
$afterStrong = [regex]::Match($pContent, '</strong>(.*?)$', $opts)
|
||||
if ($afterStrong.Success) { $afterBr = $afterStrong.Groups[1].Value }
|
||||
}
|
||||
|
||||
$desc = Get-CleanText $afterBr
|
||||
$null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc })
|
||||
}
|
||||
|
||||
return ,$dishes
|
||||
}
|
||||
|
||||
function Extract-CourseBlock($html, $courseLabel) {
|
||||
$opts = [System.Text.RegularExpressions.RegexOptions]::Singleline
|
||||
$pattern = [regex]::Escape($courseLabel) + '.{0,300}?et_pb_text_inner">(.+?)(?=<h[123]|et_pb_text_inner"><h|V:\s*<|Vegetarian item|et_pb_row_inner_[23])'
|
||||
$m = [regex]::Match($html, $pattern, $opts)
|
||||
if ($m.Success) { return $m.Groups[1].Value }
|
||||
|
||||
$idx = $html.IndexOf($courseLabel)
|
||||
if ($idx -ge 0) {
|
||||
$sub = $html.Substring($idx, [Math]::Min(4000, $html.Length - $idx))
|
||||
$innerM = [regex]::Match($sub, 'et_pb_text_inner">(.*?)(?=et_pb_text_inner|</div></div></div>)', $opts)
|
||||
if ($innerM.Success) { return $innerM.Groups[1].Value }
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
$nameM = [regex]::Match($html, '<title>(.+?) \| Inlander')
|
||||
Write-Host "Name: $($nameM.Groups[1].Value.Trim())"
|
||||
|
||||
$priceM = [regex]::Match($html, '<strong>\$(\d+)</strong>')
|
||||
Write-Host "Price: $($priceM.Groups[1].Value)"
|
||||
|
||||
$cuisineM = [regex]::Match($html, 'CUISINE:\s*([A-Z][A-Za-z /]+?)(?:\s*</|\s*<)')
|
||||
$cuisine = (Get-Culture).TextInfo.ToTitleCase($cuisineM.Groups[1].Value.Trim().ToLower())
|
||||
Write-Host "Cuisine: $cuisine"
|
||||
|
||||
$phoneM = [regex]::Match($html, '\((?:208|509)\) \d{3}-\d{4}')
|
||||
Write-Host "Phone: $($phoneM.Value)"
|
||||
|
||||
$hoursM = [regex]::Match($html, 'Menu served [^<]+')
|
||||
Write-Host "Hours: $($hoursM.Value.Trim())"
|
||||
|
||||
$areas = @()
|
||||
foreach ($aKey in $areaMap.Keys) {
|
||||
if ($html.ToUpper().Contains($aKey)) { $areas += $areaMap[$aKey] }
|
||||
}
|
||||
Write-Host "Areas: $($areas -join ', ')"
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "--- FIRST COURSE ---"
|
||||
$fc = Extract-CourseBlock $html 'First Course'
|
||||
$firstCourse = Extract-Dishes $fc
|
||||
foreach ($d in $firstCourse) { Write-Host " [$($d.name)] | $($d.desc)" }
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "--- SECOND COURSE ---"
|
||||
$sc = Extract-CourseBlock $html 'Second Course'
|
||||
$secondCourse = Extract-Dishes $sc
|
||||
foreach ($d in $secondCourse) { Write-Host " [$($d.name)] | $($d.desc)" }
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "--- THIRD COURSE ---"
|
||||
$tc = Extract-CourseBlock $html 'Third Course'
|
||||
$thirdCourse = Extract-Dishes $tc
|
||||
foreach ($d in $thirdCourse) { Write-Host " [$($d.name)] | $($d.desc)" }
|
||||
Reference in New Issue
Block a user