diff --git a/2025-restaurants.json b/2025-restaurants.json index b3228fb..6dd683f 100644 --- a/2025-restaurants.json +++ b/2025-restaurants.json @@ -14,15 +14,45 @@ "courses": { "First Course": [ { - "name": "Quesadilla", - "desc": "Braised short rib, pepperjack cheese, black bean corn salsa, chipotle crema, queso fresco, pico de gallo" + "name": "Short Rib", + "desc": "Quesadilla Braised short rib, pepperjack cheese, black bean corn salsa, chipotle crema, queso fresco, pico de gallo" + }, + { + "name": "Ahi Tuna Crudo", + "desc": "Cucumber mignonette, shallots, garlic oil, wonton crisps, watermelon radishes" + }, + { + "name": "Kale and Brussels Sprouts Salad", + "desc": "Chopped kale, shredded Brussels sprouts, grated" } ], "Second Course": [ - + { + "name": "Southwestern Sirloin", + "desc": "Marinated top sirloin, sweet potato hash, chimichurri, fried sweet potato crisps, asparagus GF" + }, + { + "name": "Mediterranean Chicken", + "desc": "Pan-seared French cut chicken breast, saffron rice, squash medley, lemon garlic aioli, tomato-shallot relish, balsamic glaze" + }, + { + "name": "Cajun Jambalaya Pasta", + "desc": "Chicken, andouille sausage, creamy Cajun sauce, okra, penne, garlic bread" + } ], "Third Course": [ - + { + "name": "Strawberry Bliss", + "desc": "Yellow cake stacked with strawberry compote, vanilla custard, basil strawberry coulis, vanilla anglaise" + }, + { + "name": "Chocolate Peanut Butter Cake", + "desc": "Chocolate and peanut butter layer cake, Irish cream chocolate ganache" + }, + { + "name": "Crème Brulée", + "desc": "House-made crème brulée, fresh mixed berries GF" + } ] } } @@ -505,16 +535,49 @@ "url": "https://inlanderrestaurantweek.com/project/bardenay/", "menu": { "hours": "Menu served Sun-Thu, 5-9 pm; Fri-Sat, 5-10 pm", - "phone": "", + "phone": "(208) 765-1540", "courses": { "First Course": [ - + { + "name": "Corn Panna Cotta", + "desc": "Savory roasted corn custard, cornbread, popcorn, avocado crema, tajin, cotija, and chives V" + }, + { + "name": "Steak \u0026 Potato Canapés", + "desc": "Charbroiled beef tenderloin, Boursin cheese spread, arugula, pickled red onion, and balsamic glaze on roasted Yukon gold potato medallions GF" + }, + { + "name": "Smoked Trout Dip", + "desc": "Local Idaho red trout cream cheese spread, beet-marinated deviled eggs, capers, candied bacon, lemon zest, and everything bagel crisps" + } ], "Second Course": [ - + { + "name": "Massaman Curry Lamb Shank", + "desc": "Braised spiced lamb shank, carrots, and potatoes in a coconut milk curry over saffron basmati rice GF DF" + }, + { + "name": "Miso Red Snapper", + "desc": "Charbroiled Atlantic red snapper fillet, sweet miso-tamari glaze, chilled soba noodle salad with edamame, water chestnuts, peppers, onions, and sesame seeds DF" + }, + { + "name": "Mushroom Spanakopita", + "desc": "Wild mushroom medley, spinach, fresh herbs, and feta baked in puff pastry with roasted carrot and chickpea Greek salad and tzatziki V" + } ], "Third Course": [ - + { + "name": "Wasabi Mojito Cheesecake", + "desc": "Minty-lime cheesecake with a hint of wasabi, Bardenay rum mojito sauce, whipped cream, and frosted lime zest GF" + }, + { + "name": "Elvis Cake", + "desc": "Peanut butter mousse in a cookie crust with brûléed banana, caramel, candied bacon, whipped cream, and shaved chocolate GF" + }, + { + "name": "Root Beer Float", + "desc": "Not Your Father’s Root Beer over house-spun espresso-cinnamon ice cream. Must be 21 to order" + } ] } } @@ -530,16 +593,49 @@ "url": "https://inlanderrestaurantweek.com/project/barkrescuepub/", "menu": { "hours": "Menu served Sun-Thur, 4-9 pm; Fri-Sat, 4-10 pm", - "phone": "", + "phone": "(509) 418-2551", "courses": { "First Course": [ - + { + "name": "Korean Spam Dog", + "desc": "Two spam and mozzarella-stuffed Korean dogs served with a side of kimchi slaw and sriracha aioli" + }, + { + "name": "Crispy Brussels", + "desc": "Crispy Brussels sprouts with parmesan and lemon basil dipping sauce GF V" + }, + { + "name": "Thai Mozzarella Egg Rolls", + "desc": "Mozzarella and Thai peanut sauce egg rolls served with a sriracha dipping sauce" + } ], "Second Course": [ - + { + "name": "Garlic Steak Bites", + "desc": "Garlic parmesan steak bites on top of a potato mash and served with a chimichurri sauce GF" + }, + { + "name": "Tamale with Rojo Pork", + "desc": "Green chili and cheese tamale topped with rojo braised pork GF" + }, + { + "name": "Tofu Schnitzel Sandwich", + "desc": "Ultra-crispy tofu schnitzel sandwich with garlic, mayo, arugula, tomato and a fried egg on a brioche bun V" + } ], "Third Course": [ - + { + "name": "Oreo Icebox Cake", + "desc": "Oreo Icebox Cake V+" + }, + { + "name": "Apple Compote Hand Pie", + "desc": "Apple Compote Hand Pie" + }, + { + "name": "Roasted Pineapple Angel Food Cake", + "desc": "Angel food cake with brown sugar roasted pineapple and a buttercream glaze" + } ] } } @@ -1257,13 +1353,46 @@ "phone": "", "courses": { "First Course": [ - + { + "name": "Signature Salad", + "desc": "Organic baby spinach, brie, bacon, apple and candied walnuts, dressed with pomegranate vinaigrette" + }, + { + "name": "Mac n Cheese", + "desc": "Rich four-cheese sauce, stuffed with cream cheese and topped with gruyere cheese sauce V" + }, + { + "name": "Ahi Poke", + "desc": "Yellow fin tuna, green onion, avocado, crispy tortilla, seaweed and cucumber, topped with tamari sauce" + } ], "Second Course": [ - + { + "name": "Halibut Filet", + "desc": "Tender halibut in a buerre blanc sauce on parmesan risotto with seasonal vegetables and Asian cucumbers GF" + }, + { + "name": "Rack of Lamb", + "desc": "Topped with a huckleberry glaze and goat cheese, served with roasted local potatoes and seasonal vegetables GF" + }, + { + "name": "Surf n Turf Burger", + "desc": "Beef filet topped with butter poached lobster and bearnaise sauce, served on a brioche bun with garlic fries and a lemon aioli" + } ], "Third Course": [ - + { + "name": "Blueberry Cobbler", + "desc": "Local blueberries, crispy topping with blueberry sauce, whipped cream and vanilla bean ice cream V" + }, + { + "name": "Flourless Chocolate Cake", + "desc": "Served with a huckleberry glaze and whipped cream GF V" + }, + { + "name": "New York Cheesecake", + "desc": "Creamy classic cheesecake topped with strawberry sauce and whipped cream" + } ] } } @@ -1573,13 +1702,46 @@ "phone": "", "courses": { "First Course": [ - + { + "name": "Burrata Naan", + "desc": "Burrata cheese, baby arugula, spinach pesto, marinara sauce on our house-made naan GFA V" + }, + { + "name": "Whipped Feta and Pita", + "desc": "Creamy whipped feta, pickled garlic, sweet and spicy honey drizzle GFA V" + }, + { + "name": "Scallops and Arugula Salad", + "desc": "Pan-seared scallops, baby arugula, shaved parmesan, pickled shallots, sunflower seeds, microgreens, tossed with a gin vinaigrette GF" + } ], "Second Course": [ - + { + "name": "Bourbon Butter Raviolis", + "desc": "Bourbon butter, shallots, garlic, white wine, heavy cream, wild mushroom stuffed raviolis garnished with parsley and parmesan cheese V" + }, + { + "name": "Cajun Trout", + "desc": "Blackened steelhead trout, sautéed spinach, over garlic Yukon gold mashed potatoes" + }, + { + "name": "Birria Beef Over Polenta", + "desc": "Birria-braised beef in a consommé, chipotle sauce, cojita cheese, cilantro and tomato" + } ], "Third Course": [ - + { + "name": "Waffles and Cream", + "desc": "Sugar pearl waffles stuffed with French vanilla bean ice cream, salted bourbon caramel" + }, + { + "name": "Tiramisu", + "desc": "Layers of coffee-soaked ladyfingers, creamy mascarpone, cocoa dusting" + }, + { + "name": "Apple and Cherry Crisp", + "desc": "Gingered apple, sour cherry, cinnamon crumble" + } ] } } @@ -1598,13 +1760,46 @@ "phone": "(509) 863-9501", "courses": { "First Course": [ - + { + "name": "Grilled Octopus Salad", + "desc": "Fire grilled octopus, fresh spinach, organic heirloom tomato, cucumber, green onion, pepitas, sweet and tangy citrus vinaigrette GF" + }, + { + "name": "Acorn Squash Arancini", + "desc": "Risotto, acorn squash, herb seasoned bread crumbs, Parmesan Reggiano, smokey tomato jam, green garnish V" + }, + { + "name": "Braised Beet and Fugi Apple Salad", + "desc": "Crisp Fugi apple, braised red and golden beets, herbed ricotta cheese, horseradish vinaigrette, fresh herbs, candied pecans GF V" + } ], "Second Course": [ - + { + "name": "Bison Meatloaf", + "desc": "Savory bison, ground and mixed with traditional breadcrumbs, egg and fresh herbs, sweet and spicy mixed berry glaze, whipped then baked Parmesan Duchess potatoes, garlic roasted green beans" + }, + { + "name": "Smoked Pork Ribs", + "desc": "Smoked then pan-finished tender pork ribs, tangy smokey chipotle barbecue sauce, crispy broiled sunchokes, sweet corn pureé, house-fried chicharrones GF" + }, + { + "name": "Crab and Butternut Squash Pasta", + "desc": "Lump Alaskan Crab meat, spiraled butternut squash, buttery white wine sauce, smoked artichoke hearts, heirloom cherry tomatoes, Parmesan Reggiano, green onion curls Vegetarian upon request GF V" + } ], "Third Course": [ - + { + "name": "Yuzu and Blonde Chocolate Choux Buns", + "desc": "French Choux pastry baked with a sugary craquelin top, citrusy yuzu pastry cream, white chocolate caramel mousse rosette V" + }, + { + "name": "Pot de Creme au Chocolat", + "desc": "Velvety rich chocolate custard, dulce de leche whipped with heavy cream, candied orange GF V" + }, + { + "name": "French Chocolate Cheesecake", + "desc": "Creamy, chocolatey cheesecake, sweet walnut crust, boozy caramel sauce, Chantilly cream V" + } ] } } @@ -2088,13 +2283,46 @@ "phone": "(509) 315-4613", "courses": { "First Course": [ - + { + "name": "Winter Salad", + "desc": "citrus. hazelnut. goat cheese." + }, + { + "name": "Smoked Steelhead", + "desc": "amaranth. roe. almond." + }, + { + "name": "Pork Rillettes", + "desc": "Gander \u0026 Ryegrass bread. olive. caper and raisin chutney." + } ], "Second Course": [ - + { + "name": "Casarecce", + "desc": "pork shoulder ragout. parmesan." + }, + { + "name": "Campanelle", + "desc": "squash. pork belly. pepitas." + }, + { + "name": "Spaghetti", + "desc": "red beef sauce. parmesan." + } ], "Third Course": [ - + { + "name": "Pork Tenderloin", + "desc": "carrot. chicory. hazelnut." + }, + { + "name": "Scallop", + "desc": "cauliflower. mushroom. citrus." + }, + { + "name": "Shortrib", + "desc": "potatoes. kale. root vegetables." + } ] } } @@ -2492,28 +2720,44 @@ "courses": { "First Course": [ { - "name": "GF V", + "name": "Subudana Pakore", "desc": "Subudana (tapioca), potatoes, cumin, and finely chopped fresh herbs, fried as a pakore (fritter) GF V" }, { - "name": "GF V", + "name": "Moong Dal Chaat", "desc": "Moong dal (green lentil) pakore (fritter), served chaat-style (street food snack) topped with sweetened yogurt and chutneys GF V" }, { - "name": "GF V", + "name": "Singhara Aloo Tikki", "desc": "Water chestnut flour, potatoes, cilantro, green chiles, ginger, fresh herbs and spices. Crispy on the outside and deliciously soft inside GF V" } ], "Second Course": [ { - "name": "GF DFA", + "name": "Chicken Methi Mali", + "desc": "Chicken and fenugreek leaves cooked with Indian aromas. Freshly chopped ginger, garlic and fresh yogurt make a thick, creamy sauce. Served with rice and naan GF V+A" + }, + { + "name": "Saag with Goat", "desc": "Goat marinated with yogurt, fresh herbs and Indian aromas. Cooked in creamy saag (spinach sauce), served with rice and naan GF DFA" + }, + { + "name": "Methi Matter Mali", + "desc": "Methi (fenugreek) and green peas cooked with fragrant and creamy gravy with spices. Served with rice and naan GF – Coconut milk option / vegetarian option" } ], "Third Course": [ { - "name": "GF V+", + "name": "Carrot Cake Halwa", + "desc": "Fresh carrots roasted in butter, pistachios, cashews, golden raisins, and plenty of ground cardamom" + }, + { + "name": "Paan Ice Cream", "desc": "This refreshing paan (betel leaves) ice cream has all the flavors: rose petal jam, candied fennel, dates, and coconut GF V+" + }, + { + "name": "Gulabjamun with Rabdi", + "desc": "Indian donuts served with delicious, creamy, and rich rabdi (pistachio and cashew sauce) V" } ] } @@ -2766,28 +3010,44 @@ "courses": { "First Course": [ { - "name": "GF V", + "name": "Subudana Pakore", "desc": "Subudana (tapioca), potatoes, cumin, and finely chopped fresh herbs, fried as a pakore (fritter) GF V" }, { - "name": "GF V", + "name": "Moong Dal Chaat", "desc": "Moong dal (green lentil) pakore (fritter), served chaat-style (street food snack) topped with sweetened yogurt and chutneys GF V" }, { - "name": "GF V", + "name": "Singhara Aloo Tikki", "desc": "Water chestnut flour, potatoes, cilantro, green chiles, ginger, fresh herbs and spices. Crispy on the outside and deliciously soft inside GF V" } ], "Second Course": [ { - "name": "GF DFA", + "name": "Chicken Methi Mali", + "desc": "Chicken and fenugreek leaves cooked with Indian aromas. Freshly chopped ginger, garlic and fresh yogurt make a thick, creamy sauce. Served with rice and naan GF V+A" + }, + { + "name": "Saag with Goat", "desc": "Goat marinated with yogurt, fresh herbs and Indian aromas. Cooked in creamy saag (spinach sauce), served with rice and naan GF DFA" + }, + { + "name": "Methi Matter Mali", + "desc": "Methi (fenugreek) and green peas cooked with fragrant and creamy gravy with spices. Served with rice and naan GF – Coconut milk option / vegetarian option" } ], "Third Course": [ { - "name": "GF V+", + "name": "Carrot Cake Halwa", + "desc": "Fresh carrots roasted in butter, pistachios, cashews, golden raisins, and plenty of ground cardamom" + }, + { + "name": "Paan Ice Cream", "desc": "This refreshing paan (betel leaves) ice cream has all the flavors: rose petal jam, candied fennel, dates, and coconut GF V+" + }, + { + "name": "Gulabjamun with Rabdi", + "desc": "Indian donuts served with delicious, creamy, and rich rabdi (pistachio and cashew sauce) V" } ] } @@ -3007,31 +3267,43 @@ "courses": { "First Course": [ { - "name": "GFA V V+", + "name": "Hummus Dip with Pita", "desc": "Slow-cooked garbanzo beans blended with tahini, lemon and garlic, topped with extra virgin olive oil and served with pita GFA V V+ – Add beef shawarma meat $14" }, { - "name": "GFA V", + "name": "Tzatziki Dip with Pita", "desc": "Fresh Greek yogurt, cucumber, fresh dill, mint and garlic, served with pita GFA V – Add lamb kofta skewer $9" }, { - "name": "GF V V+", + "name": "Vegetarian Grape Leaves with Tzatziki", "desc": "Six handmade rolls stuffed with seasoned rice and vegetables, slow-cooked in tomato broth and olive oil, served with tzatziki sauce GF V V+ – Add gyro meat $12" } ], "Second Course": [ { - "name": "GF V V+", + "name": "Chicken Shish Kebab Platter", + "desc": "Two skewers of marinated tenderloin chunks grilled on an open flame, served over saffron turmeric basmati rice with a side of Mediterranean salad and garlic sauce GF – Upgrade side to tabouleh $6" + }, + { + "name": "Falafel Dinner Platter", "desc": "Deep fried falafel patties served over saffron turmeric basmati rice with a side of Mediterranean salad and our tahini sauce GF V V+ – Upgrade side to lentil soup $8" + }, + { + "name": "Gyros Greek Platter", + "desc": "Grilled beef and lamb strips served over saffron turmeric basmati rice with a side of Mediterranean salad and our tahini sauce DF – Upgrade side to Greek salad $6" } ], "Third Course": [ { - "name": "GF V", + "name": "Lebanese Rice Pudding", "desc": "Creamy rice pudding flavored with orange blossom and rose water GF V – Add ice cream scoop $5" }, { - "name": "GF V V+", + "name": "Namoura (Semolina Cake)", + "desc": "Popular classic Middle Eastern dessert made with semolina flour and topped with a sweet sugar syrup – Add nuts and honey $5" + }, + { + "name": "Halva", "desc": "A Middle Eastern treat made from tahini GF V V+ – Add pita $2" } ] @@ -3628,13 +3900,46 @@ "phone": "", "courses": { "First Course": [ - + { + "name": "EFESTĒ Feral Sauvignon Blanc", + "desc": "White peach, lime leaf, flint" + }, + { + "name": "No-Li Porch Glow Amber", + "desc": "Crisp and refreshing with a hint of chocolate malt" + }, + { + "name": "Maple New Fashioned", + "desc": "Browne Family whiskey, barrel-aged maple syrup, Amarena cherry juice, Peychaud’s bitters" + } ], "Second Course": [ - + { + "name": "Double Smash Burger", + "desc": "Two quarter-pound Prime beef patties, American cheese, caramelized onion, aioli and Thousand Island on a brioche bun, served with french fries" + }, + { + "name": "8 oz. New York", + "desc": "Russet purée, seasonal vegetable, brandy peppercorn demi-glace" + }, + { + "name": "Pan-Seared Salmon", + "desc": "Champagne vinaigrette orzo, Brussels sprouts, bacon, mustard beurre blanc" + } ], "Third Course": [ - + { + "name": "Crème Brûlée", + "desc": "Baked coconut vanilla custard, caramelized sugar" + }, + { + "name": "Cranberry Orange Chiffon", + "desc": "White chocolate chiffon cake, orange white chocolate mousse, cranberry gelée" + }, + { + "name": "Strawberry Rhubarb Sorbet", + "desc": "Fresh berries, mint" + } ] } } @@ -3944,13 +4249,46 @@ "phone": "(509) 323-2578", "courses": { "First Course": [ - + { + "name": "Burrata Caprese", + "desc": "Heirloom tomato, red pesto, basil, grilled baguette V" + }, + { + "name": "Roasted Beet Salad", + "desc": "Roasted beets and watercress, pancetta, chevre, lemon vinaigrette GF" + }, + { + "name": "Baby Kale Caesar", + "desc": "Baby kale, garlic crouton, aged Parmesan Add chicken $5 or shrimp $8" + } ], "Second Course": [ - + { + "name": "Risotto with Roasted Butternut Squash", + "desc": "Risotto, roasted butternut squash, sage, smoked gouda and mascarpone" + }, + { + "name": "Carleton Farms Pork Loin", + "desc": "Pork loin brined and lightly smoked, baby kale, garlic chips, gruyere mashed potato, honey-apricot gastrique" + }, + { + "name": "Flatiron Steak Frites Wild Mushrooms", + "desc": "Flatiron steak frites wild mushrooms, roasted leeks, 10-year aged balsamic, truffle fries" + } ], "Third Course": [ - + { + "name": "Peach Bread Pudding", + "desc": "Fig jam, vanilla crème anglaise" + }, + { + "name": "Coconut Panna Cotta", + "desc": "Coconut panna cotta, raspberries, lemon curd, Chantilly cream" + }, + { + "name": "Crème Brulee", + "desc": "" + } ] } } @@ -4085,15 +4423,45 @@ "phone": "(509) 934-1979", "courses": { "First Course": [ - + { + "name": "Steak Bites", + "desc": "Steak bites marinated in house-made signature steak sauce. Served with cheesy garlic bread" + }, + { + "name": "Caesar Salad", + "desc": "Romaine, house-made Caesar dressing, croutons and grated parmesan cheese GFA – Add chicken $6" + }, + { + "name": "Garlic Cheese Curds", + "desc": "Garlic breaded Wisconsin white cheddar cheese curds with Ponderosa boom-boom dipping sauce V" + } ], "Second Course": [ - + { + "name": "Chicken Fried Steak", + "desc": "12 oz. chicken fried steak served with vegetable medley and choice of potatoes" + }, + { + "name": "Bleu Cheese \u0026 Pecan Salmon", + "desc": "Pecan and bleu cheese crusted grilled salmon. Served with white rice and vegetable medley GFA" + }, + { + "name": "Bourbon Chicken", + "desc": "Grilled chicken breast topped with mushroom and onion bourbon sauce. Served with mashed potatoes and vegetable medley" + } ], "Third Course": [ { - "name": "GFA V", + "name": "Brownie Skillet", + "desc": "Warm brownie in a cast iron skillet topped with vanilla ice cream V" + }, + { + "name": "Creme Brulee", "desc": "House-made creme brulee GFA V" + }, + { + "name": "Lemon Cookie Sandwich", + "desc": "Lemon and white chocolate chip cookie filled with vanilla ice cream and house-made bourbon caramel sauce V" } ] } @@ -4735,13 +5103,46 @@ "phone": "(509) 326-7251", "courses": { "First Course": [ - + { + "name": "Celtic Caesar Salad", + "desc": "Chopped romaine, baby kale, shaved Parmesan, house-made croutons" + }, + { + "name": "Donegal Bay Clam Chowder", + "desc": "Creamy clam chowder, chopped red pepper, cabbage, onion, potato" + }, + { + "name": "Boxty Cakes", + "desc": "Two breaded potato cakes with corned beef and Dubliner cheese, deep fried" + } ], "Second Course": [ - + { + "name": "Corned Beef and Cabbage", + "desc": "Our signature dish! Slow-cooked, tender corned beef, braised cabbage, colcannon potatoes, creamy horseradish" + }, + { + "name": "Guinness Beef Stew", + "desc": "Guinness-braised Kobe beef cubes, rustic-cut carrots, parsnips, celery, pearl onions with Colcannon-style mashed potatoes on top" + }, + { + "name": "Fish and Chips", + "desc": "Two pieces of wild-caught, sustainable Pacific Cod, hand-breaded in crispy panko and seasonings. Comes with French fries and scratch-made tartar sauce" + } ], "Third Course": [ - + { + "name": "Irish Bread Pudding", + "desc": "Scratch-made and topped with a buttered rum sauce and currants" + }, + { + "name": "Bailey’s Creme Brulee", + "desc": "Bailey’s custard with a caramelized sugar topping and a shortbread shamrock cookie" + }, + { + "name": "Danny Boy’s Chocolate Brownie", + "desc": "Scratch-made chocolate brownie topped with candied pecans and Andes mint crumbles" + } ] } } @@ -5343,13 +5744,46 @@ "phone": "(509) 598-4300", "courses": { "First Course": [ - + { + "name": "Burrata Cheese and Local Beets", + "desc": "Arugula, heirloom tomatoes, aged balsamic reduction GF, V" + }, + { + "name": "Grapefruit Salad", + "desc": "Bibb lettuce, goat cheese, avocado, mandarin oranges, mint-lime vinaigrette GF, V" + }, + { + "name": "Smoky Butternut Squash Bisque", + "desc": "Chipotle pepitas, cilantro GF, V" + } ], "Second Course": [ - + { + "name": "Grilled Bone-In Pork Chop", + "desc": "Smoked apple puree, mashed potatoes, local vegetables GF" + }, + { + "name": "Blueberry Duck Breast", + "desc": "Blueberry gastrique, fingerling potatoes, local vegetables GF" + }, + { + "name": "Pan Seared Sea Scallops", + "desc": "Lemon-tarragon risotto, brown butter, local vegetables GF" + } ], "Third Course": [ - + { + "name": "Rolo Dome", + "desc": "Chocolate mousse, salted caramel GF, V" + }, + { + "name": "Blood Orange Cheesecake", + "desc": "Vanilla cheesecake, blood orange gelee V" + }, + { + "name": "Crème Brûlée", + "desc": "Housemade vanilla custard GF, V" + } ] } } @@ -5415,13 +5849,46 @@ "phone": "(208) 758-0111", "courses": { "First Course": [ - + { + "name": "Tequila Clams", + "desc": "Reposado tequila, Argentinian chorizo, clams, herbs, lime, grilled sourdough GFA Double portion size \u0026#8211; $8" + }, + { + "name": "Half Salad", + "desc": "Half size portion of any salad: Quinoa Greens V+ , Jicama Citrus V , Warmed Spinach, Roasted Beet V , Cezar GFA Add your choice of protein \u0026#8211; $6, Full size salad \u0026#8211; $6" + }, + { + "name": "Elote Cakes", + "desc": "Roasted corn, masa, queso fresco, cilantro, crema, pickled Fresno chilis GF V" + } ], "Second Course": [ - + { + "name": "Churrasco Steak", + "desc": "Argentinian-style grilled skirt steak, chimichurri, mashed sweet potatoes with coconut milk GF Add grilled shrimp \u0026#8211; $8" + }, + { + "name": "Arroz con Gandules", + "desc": "Honduran-style rice dish with pork, pigeon peas, vegetables and spices GF" + }, + { + "name": "Chicken or Vegetable Tamale", + "desc": "Slow cooked pulled chicken or spiced roasted vegetable blend (V), stuffed inside our banana leaf-wrapped Oaxacan style tamales. Gallo pinto and dressed jicama slaw GF Add a second Tamale \u0026#8211; $10" + } ], "Third Course": [ - + { + "name": "Arroz con Leche Flan", + "desc": "Latin-style spiced rice pudding composed within a velvety custard of a flan GF V" + }, + { + "name": "Chocolate Torte", + "desc": "Flourless chocolate torte, pink peppercorn goat’s milk panna cotta, burnt candied orange peel, masa crumble GF V" + }, + { + "name": "Alfajores y Cafe", + "desc": "Latin shortbread cookies served with our signature coffee blend from Coeur d’Alene Coffee Company V Add rum (to your coffee) $8" + } ] } } @@ -6108,13 +6575,46 @@ "phone": "", "courses": { "First Course": [ - + { + "name": "Boudin Balls", + "desc": "Louisiana’s version of a snacking sausage. Served with a jalapeño remoulade" + }, + { + "name": "Whipped Honey Cornbread", + "desc": "Cast-iron seared, whipped honey butter, scallions V" + }, + { + "name": "Louisiana Garlic Soup", + "desc": "Creamy roasted garlic soup. A Louisiana tradition!" + } ], "Second Course": [ - + { + "name": "Duck Gumbo", + "desc": "Duck and Andouille sausage gumbo" + }, + { + "name": "Trout Meuniere", + "desc": "Steelhead trout with a brown butter sauce" + }, + { + "name": "Pork Grillades", + "desc": "Slow-roasted pork shoulder served with our stone-ground cheesy grits GF" + } ], "Third Course": [ - + { + "name": "King Cake", + "desc": "Who will find the baby?! V" + }, + { + "name": "Beignets", + "desc": "A traditional French doughnut V \u0026#8211; Make it an affogato! $7" + }, + { + "name": "Pear \u0026 Almond Tart", + "desc": "Almond frangipane pastry with poached pear topping V" + } ] } } @@ -6193,13 +6693,46 @@ "phone": "", "courses": { "First Course": [ - + { + "name": "Roasted Beet Salad", + "desc": "Organic spring greens tossed in a balsamic white truffle vinaigrette, topped with roasted beets, toasted hazelnuts and chèvre GF V – Vegan available by request" + }, + { + "name": "Jalapeño Cheddar Chicken Soup", + "desc": "Chicken stock base, jalapeños, sharp cheddar cheese, shredded chicken, fresh herbs, and a touch of cream. A must try! GF" + }, + { + "name": "Classic Caesar Salad", + "desc": "Crisp romaine hearts tossed with a classic creamy Caesar dressing, house croutons and Parmigiano-Reggiano cheese. Served with lemon GFA" + } ], "Second Course": [ - + { + "name": "Yellow Curry Prawns or Tofu", + "desc": "House-made yellow curry with just the right amount of kick, with your choice of tofu or prawns. Served with a coconut rice cake, sweet peas, heirloom carrots, roasted red peppers, and micro pea shoots GF – Vegetarian and vegan option available" + }, + { + "name": "Braised Beef Short Ribs", + "desc": "Boneless beef short ribs roasted for 16 hours with rosemary and thyme, served with a green peppercorn red wine pan sauce and “everything” Yukon potatoes GF" + }, + { + "name": "Hunter’s Chicken", + "desc": "Chicken leg and thigh quarter, cured and braised until falling off the bone served with a rich hunter’s sauce full of vegetables, herbs and chicken stock and “everything” Yukon potatoes GF" + } ], "Third Course": [ - + { + "name": "Mini Margarita Pie", + "desc": "Frozen key lime pie with graham cracker crust. Special ingredient: tequila! Topped with whipped cream and red sea salt V" + }, + { + "name": "Wiley’s Bourbon Creme Brûlée", + "desc": "Delicious brûléed custard with vanilla bean and orange peel topped with bourbon-nutmeg caramel and whipped cream GF V" + }, + { + "name": "Chocolate Pot de Creme", + "desc": "A decadent dessert featuring coconut cream, chocolate and a hint of spice. Topped with raspberry puree and candied pecan GF V – Vegan available by request" + } ] } } diff --git a/fix-2025.ps1 b/fix-2025.ps1 deleted file mode 100644 index dbb748f..0000000 --- a/fix-2025.ps1 +++ /dev/null @@ -1,121 +0,0 @@ -# fix-2025.ps1 - Post-process the scraped 2025 restaurant JSON -# Fixes: HTML entities in names/descs, wrong prices, re-fetches 0-course restaurants - -$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition -$jsonPath = Join-Path $projectDir '2025-restaurants.json' - -$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json - -function Decode-Html($str) { - if (-not $str) { return $str } - $s = $str - $s = $s -replace '&', '&' - $s = $s -replace ''', "'" - $s = $s -replace '"', '"' - $s = $s -replace '<', '<' - $s = $s -replace '>', '>' - $s = $s -replace ' ', ' ' - $s = $s -replace '\s+', ' ' - $s.Trim() -} - -# ---- Report issues ---- -Write-Host "=== Data Quality Report ===" -Write-Host "Total restaurants: $($data.Count)" -Write-Host "" - -Write-Host "Wrong prices (not 25/35/45):" -$data | Where-Object { $_.price -notin @(25,35,45) } | ForEach-Object { - Write-Host " $($_.slug): price=$($_.price)" -} - -Write-Host "" -Write-Host "Zero-course restaurants (all 3 empty):" -$data | Where-Object { - $_.menu.courses.'First Course'.Count -eq 0 -and - $_.menu.courses.'Second Course'.Count -eq 0 -and - $_.menu.courses.'Third Course'.Count -eq 0 -} | ForEach-Object { Write-Host " $($_.slug) [price=$($_.price)] name=$($_.name)" } - -Write-Host "" -Write-Host "Partial courses (any course != 3):" -$data | Where-Object { - $_.menu.courses.'First Course'.Count -ne 3 -or - $_.menu.courses.'Second Course'.Count -ne 3 -or - $_.menu.courses.'Third Course'.Count -ne 3 -} | Where-Object { - # Exclude totally empty ones (already reported above) - -not ( - $_.menu.courses.'First Course'.Count -eq 0 -and - $_.menu.courses.'Second Course'.Count -eq 0 -and - $_.menu.courses.'Third Course'.Count -eq 0 - ) -} | ForEach-Object { - $c1 = $_.menu.courses.'First Course'.Count - $c2 = $_.menu.courses.'Second Course'.Count - $c3 = $_.menu.courses.'Third Course'.Count - Write-Host " $($_.slug): $c1/$c2/$c3" -} - -Write-Host "" -Write-Host "=== Applying Fixes ===" - -# ---- Fix HTML entities in all string fields ---- -foreach ($r in $data) { - $r.name = Decode-Html $r.name - $r.cuisine = Decode-Html $r.cuisine - - foreach ($course in @('First Course', 'Second Course', 'Third Course')) { - $items = $r.menu.courses.$course - if ($items) { - foreach ($item in $items) { - $item.name = Decode-Html $item.name - $item.desc = Decode-Html $item.desc - } - } - } -} - -# ---- Fix wrong prices using the Wayback Machine price page ---- -# The price page listed restaurants under $25, $35, $45 sections. -# We'll re-fetch pages for wrong-price restaurants using a tighter regex. - -$wrongPrice = $data | Where-Object { $_.price -notin @(25,35,45) } -if ($wrongPrice.Count -gt 0) { - Write-Host "Re-fetching $($wrongPrice.Count) restaurants with wrong prices..." - - foreach ($r in $wrongPrice) { - Write-Host " $($r.slug)..." -NoNewline - try { - $url = "https://web.archive.org/web/20250306132630/https://inlanderrestaurantweek.com/project/$($r.slug)/" - $resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 60 -ErrorAction Stop - $html = $resp.Content - - # Look specifically for h1 containing a 2-digit price at a tier - $priceM = [regex]::Match($html, ']*>.*?\$(25|35|45)', [System.Text.RegularExpressions.RegexOptions]::Singleline) - if ($priceM.Success) { - $r.price = [int]$priceM.Groups[1].Value - Write-Host " fixed to $($r.price)" - } else { - # Try all strong dollar values and pick first that's 25, 35, or 45 - $allPrices = [regex]::Matches($html, '\$(\d+)') - $validPrice = $allPrices | Where-Object { $_.Groups[1].Value -in @('25','35','45') } | Select-Object -First 1 - if ($validPrice) { - $r.price = [int]$validPrice.Groups[1].Value - Write-Host " fixed to $($r.price)" - } else { - Write-Host " could not determine - left at $($r.price)" - } - } - } catch { - Write-Host " FETCH ERROR: $_" - } - Start-Sleep -Milliseconds 300 - } -} - -# ---- Save fixed JSON ---- -$json = $data | ConvertTo-Json -Depth 10 -[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8) -Write-Host "" -Write-Host "Saved fixed JSON to $jsonPath" diff --git a/fix-tavolata.ps1 b/fix-tavolata.ps1 new file mode 100644 index 0000000..a0b29d7 --- /dev/null +++ b/fix-tavolata.ps1 @@ -0,0 +1,123 @@ +# fix-tavolata.ps1 +# Run this after the Wayback Machine rate limit resets (wait ~30 minutes after last run) +# Recovers tavolata's Third Course using the same-block parser strategy + +$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition +$jsonPath = Join-Path $projectDir '2025-restaurants.json' +$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json + +function Decode-Html($str) { + if (-not $str) { return $str } + ($str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ').Trim() +} +function Get-CleanText($rawHtml) { Decode-Html ($rawHtml -replace '<[^>]+>', ' ') } +function Test-DietaryTag($str) { $str -match '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' } + +function Get-Dish($pContent) { + $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline + $bWithBrM = [regex]::Match($pContent, '(?s)(.*?)', $opts) + if ($bWithBrM.Success) { + $name = Get-CleanText $bWithBrM.Groups[1].Value + if ($name.Length -ge 3 -and $name.Length -le 80 -and -not (Test-DietaryTag $name) -and $name -notmatch '^[A-Z]{1,3}:') { + return [PSCustomObject]@{ name = $name; desc = Get-CleanText ($pContent.Substring($bWithBrM.Index + $bWithBrM.Length)) } + } + } + $bM = [regex]::Match($pContent, '(?s)(.*?)', $opts) + if ($bM.Success) { + $namePart = Get-CleanText $bM.Groups[1].Value + if ($namePart.Length -ge 3 -and -not (Test-DietaryTag $namePart)) { + $afterB = $pContent.Substring($bM.Index + $bM.Length) + $sM2 = [regex]::Match($afterB, '(?s)^[^<]*(.*?)(.*)', $opts) + if ($sM2.Success) { + $p2 = Get-CleanText $sM2.Groups[1].Value + if (-not (Test-DietaryTag $p2) -and $p2.Length -ge 2) { + return [PSCustomObject]@{ name = "$namePart $p2".Trim(); desc = Get-CleanText $sM2.Groups[2].Value } + } + } + return [PSCustomObject]@{ name = $namePart; desc = Get-CleanText $afterB } + } + } + $sM = [regex]::Match($pContent, '(?s)(.*?)', $opts) + if ($sM.Success) { + $name = Get-CleanText $sM.Groups[1].Value + if ($name.Length -lt 3 -or $name.Length -gt 80 -or (Test-DietaryTag $name) -or $name -match '^[A-Z]{1,3}:') { return $null } + $afterBr = '' + if ($pContent -match '(?s)(.*?)$') { $afterBr = $matches[1] } + else { $am = [regex]::Match($pContent, '(?s)(.*?)$', $opts); if ($am.Success) { $afterBr = $am.Groups[1].Value } } + return [PSCustomObject]@{ name = $name; desc = Get-CleanText $afterBr } + } + return $null +} + +function Get-Dishes($courseHtml) { + $dishes = [System.Collections.ArrayList]@() + $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline + foreach ($pm in [regex]::Matches($courseHtml, '(?s)]*>(.*?)

', $opts)) { + $pc = $pm.Groups[1].Value + if ($pc -notmatch '|') { continue } + $d = Get-Dish $pc + if ($d -and $d.name) { $null = $dishes.Add($d) } + } + return ,$dishes +} + +function Get-CourseBlock($html, $label, $nextLabel) { + $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline + if ($nextLabel) { + $m = [regex]::Match($html, ([regex]::Escape($label) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'), $opts) + if ($m.Success) { return $m.Groups[1].Value } + } + $idx = $html.IndexOf($label) + if ($idx -ge 0) { + $sub = $html.Substring($idx, [Math]::Min(8000, $html.Length - $idx)) + $sameDivM = [regex]::Match($sub, '(?s)\s*()', $opts) + if ($sameDivM.Success -and $sameDivM.Groups[1].Value -match '(?!\s*\s*\s*\s* $($first.Count)/$($second.Count)/$($third.Count)" + + if ($third.Count -gt 0) { + if ($first.Count -gt 0) { $r.menu.courses.'First Course' = @($first) } + if ($second.Count -gt 0) { $r.menu.courses.'Second Course' = @($second) } + $r.menu.courses.'Third Course' = @($third) + Write-Host "SUCCESS! tavolata Third Course recovered." -ForegroundColor Green + $success = $true + } else { + Write-Host " Third Course still empty, trying next timestamp..." + } + } catch { + Write-Host " ERROR: $_" -ForegroundColor Red + } + Start-Sleep -Seconds 10 +} + +if (-not $success) { + Write-Host "Could not recover tavolata Third Course. Try again later." -ForegroundColor Yellow +} else { + $json = $data | ConvertTo-Json -Depth 10 + [System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8) + Write-Host "Saved to $jsonPath" +} diff --git a/fix2-2025.ps1 b/fix2-2025.ps1 deleted file mode 100644 index 06381ac..0000000 --- a/fix2-2025.ps1 +++ /dev/null @@ -1,130 +0,0 @@ -# fix2-2025.ps1 - Comprehensive fix for 2025 restaurant JSON -# 1. Fix all prices using authoritative data from price listing page -# 2. Fix HTML entities in all text fields -# 3. Report remaining issues - -$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition -$jsonPath = Join-Path $projectDir '2025-restaurants.json' - -# Load JSON -$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json - -# ---- Authoritative price map from price listing page ---- -$authPrices = @{ - "1898"="45"; "24taps"="25"; "315cuisine"="45"; "ambrosia"="45"; - "anthonys"="45"; "arrowhead"="25"; "baba"="45"; "backyardpublichouse"="35"; - "bangkokthai"="35"; "bardenay"="45"; "barkrescuepub"="25"; "beverlys"="45"; - "blackpearl"="25"; "borracho"="35"; "burgerdock"="25"; "cascadia"="25"; - "cedars"="45"; "centennial"="35"; "chaps"="45"; "chinook"="45"; - "chowderhead"="35"; "clinkerdagger"="45"; "cochinito"="25"; "collectivekitchen"="45"; - "dassteinhaus"="35"; "deleons"="25"; "deleonstexmex"="25"; "dockside"="35"; - "downriver"="45"; "dryfly"="35"; "durkins"="45"; "east"="45"; - "emrys"="25"; "feastworldkitchen"="35"; "flameandcork"="35"; "flatstick"="25"; - "flyinggoat"="25"; "fortheloveofgod"="35"; "francaise"="45"; "ganderryegrass"="35"; - "gardenparty"="35"; "gildedunicorn"="45"; "hang10"="25"; "heritage"="35"; - "hogwash"="45"; "honey"="35"; "hulapot"="35"; "indiahouse"="35"; - "indicana"="45"; "inlandpacifickitchen"="45"; "irongoat"="35"; "ironwoodice"="35"; - "karma"="35"; "kasa"="25"; "kismet"="35"; "kunisthai"="35"; - "latahbistro"="45"; "lebanon"="35"; "legendsoffire"="45"; "littledragon"="25"; - "littlenoodle"="25"; "longhornbbq"="25"; "loren"="45"; "lumberbeard"="35"; - "macdaddys"="35"; "mackenzieriver"="25"; "mammamias"="25"; "mangotree"="25"; - "maryhill"="45"; "masselowslounge"="45"; "max"="45"; "meltingpot"="45"; - "mortys"="25"; "northhill"="35"; "odohertys"="35"; "osprey"="35"; - "outsider"="45"; "palmcourtgrill"="45"; "ponderosa"="35"; "purenorthwest"="35"; - "purgatory"="45"; "qqsushi"="35"; "redtail"="35"; "republickitchen"="35"; - "republicpi"="25"; "rut"="35"; "safariroom"="45"; "saranac"="35"; - "satay"="45"; "sauced"="25"; "screamingyak"="25"; "seasons"="45"; - "shawnodonnells"="25"; "shelbys"="25"; "skewers"="25"; "southhillgrill"="45"; - "southperrylantern"="45"; "spencers"="45"; "steamplant"="35"; "steelhead"="35"; - "stylus"="35"; "sweetlous"="35"; "swinglounge"="35"; "table13"="45"; - "tavolata"="45"; "terraza"="35"; "thaibamboo"="25"; "thedambar"="45"; - "titos"="35"; "tomatostreet"="35"; "tonysonthelake"="45"; "torratea"="45"; - "truelegends"="25"; "twigs"="35"; "uprise"="25"; "vaqueros"="35"; - "vicinopizza"="25"; "victoryburger"="25"; "vieuxcarre"="35"; "vineolive"="45"; - "wileys"="45" -} - -function Decode-Html($str) { - if (-not $str) { return $str } - $s = $str - $s = $s -replace '&', '&' - $s = $s -replace ''', "'" - $s = $s -replace '"', '"' - $s = $s -replace '<', '<' - $s = $s -replace '>', '>' - $s = $s -replace ' ', ' ' - $s = $s -replace '\s+', ' ' - $s.Trim() -} - -$priceFixed = 0 -$entitiesFixed = 0 - -foreach ($r in $data) { - # Fix price from authoritative map - if ($authPrices.ContainsKey($r.slug)) { - $correctPrice = [int]$authPrices[$r.slug] - if ($r.price -ne $correctPrice) { - Write-Host "Price fix: $($r.slug) $($r.price) -> $correctPrice" - $r.price = $correctPrice - $priceFixed++ - } - } - - # Fix HTML entities - $oldName = $r.name - $r.name = Decode-Html $r.name - $r.cuisine = Decode-Html $r.cuisine - $r.menu.hours = Decode-Html $r.menu.hours - if ($oldName -ne $r.name) { $entitiesFixed++ } - - foreach ($course in @('First Course', 'Second Course', 'Third Course')) { - $items = $r.menu.courses.$course - if ($items) { - foreach ($item in $items) { - $item.name = Decode-Html $item.name - $item.desc = Decode-Html $item.desc - } - } - } -} - -Write-Host "" -Write-Host "Fixed prices: $priceFixed" -Write-Host "Fixed names with entities: $entitiesFixed" -Write-Host "" - -# ---- Report remaining issues ---- -Write-Host "=== Remaining Issues ===" -Write-Host "" - -Write-Host "Zero-course restaurants (all 3 empty):" -$data | Where-Object { - $_.menu.courses.'First Course'.Count -eq 0 -and - $_.menu.courses.'Second Course'.Count -eq 0 -and - $_.menu.courses.'Third Course'.Count -eq 0 -} | ForEach-Object { Write-Host " $($_.slug) [$($_.price)] $($_.name)" } - -Write-Host "" -Write-Host "Partial courses (any course count not 3):" -$data | Where-Object { - ($_.menu.courses.'First Course'.Count -ne 3 -or - $_.menu.courses.'Second Course'.Count -ne 3 -or - $_.menu.courses.'Third Course'.Count -ne 3) -and - -not ( - $_.menu.courses.'First Course'.Count -eq 0 -and - $_.menu.courses.'Second Course'.Count -eq 0 -and - $_.menu.courses.'Third Course'.Count -eq 0 - ) -} | ForEach-Object { - $c1 = $_.menu.courses.'First Course'.Count - $c2 = $_.menu.courses.'Second Course'.Count - $c3 = $_.menu.courses.'Third Course'.Count - Write-Host " $($_.slug) [$($_.price)]: $c1/$c2/$c3 - $($_.name)" -} - -# ---- Save ---- -$json = $data | ConvertTo-Json -Depth 10 -[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8) -Write-Host "" -Write-Host "Saved to $jsonPath" diff --git a/memory/MEMORY.md b/memory/MEMORY.md new file mode 100644 index 0000000..e0a28b6 --- /dev/null +++ b/memory/MEMORY.md @@ -0,0 +1,49 @@ +# Inlander Restaurant Week Picker - Project Memory + +## Quick Reference +- See `scraping-guide.md` for full year-scraping instructions and script templates +- See `html-structures.md` for HTML parsing patterns per restaurant type +- Project dir: `\\WinServ-20-3.chns.local\Profiles\derekc\Documents\Coding Projects\Gitea-CooperandGoodman-Inlander-Restaurant-Week-Picker\Inlander-Restaurant-Week-Picker` + +## Key Constraints (CRITICAL) +- **WebFetch cannot access web.archive.org** — use `curl` via Bash tool instead +- **PowerShell cannot run scripts from UNC paths** (\\server\...) — always `cp` scripts to local temp first +- **bash `/tmp`** = `C:\Users\DEREKC~1.CHN\AppData\Local\Temp` (8.3 short name) +- **PowerShell temp** = `C:\Users\derekc.CHNSLocal\AppData\Local\Temp` (long name) — same dir, different string +- **Wayback Machine rate limits** to ~20 requests before throttling with 429; use 3-5 sec delays, wait 30+ min after getting blocked + +## JSON Schema +Each entry in `YEAR-restaurants.json`: +```json +{ + "name": "Restaurant Name", + "slug": "restaurantslug", + "price": 45, + "areas": ["Downtown"], + "cuisine": "American", + "url": "https://inlanderrestaurantweek.com/project/SLUG/", + "menu": { + "hours": "Menu served 5pm-close", + "phone": "(509) 555-1234", + "courses": { + "First Course": [{"name": "Dish Name", "desc": "Description"}], + "Second Course": [...], + "Third Course": [...] + } + } +} +``` +Price is always 25, 35, or 45. gardenparty genuinely has 4 Third Course options. + +## 2025 Data Status +- **File**: `2025-restaurants.json` (121 restaurants) +- **Wayback snapshot used**: `20250306132630` (primary), `20250401000000` (backup for some) +- **Complete (3/3/3+)**: 111 restaurants +- **gardenparty**: 3/3/4 — correct, it genuinely offers 4 dessert choices +- **tavolata**: 3/3/0 — needs fix-tavolata.ps1 run when rate limit resets +- **0/0/0 (JS-only, unrecoverable)**: heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza + +## Scripts in Project Directory +- `fix-tavolata.ps1` — run after rate limit resets to recover tavolata Third Course + - Copy to local temp and run: `cp ...\fix-tavolata.ps1 C:\Users\derekc.CHNSLocal\AppData\Local\Temp\` + - Then: `powershell.exe -ExecutionPolicy Bypass -File C:\Users\derekc.CHNSLocal\AppData\Local\Temp\fix-tavolata.ps1` diff --git a/memory/html-structures.md b/memory/html-structures.md new file mode 100644 index 0000000..6e0dece --- /dev/null +++ b/memory/html-structures.md @@ -0,0 +1,152 @@ +# IRW Website HTML Structure Reference + +## Restaurant Page URL +Live: `https://inlanderrestaurantweek.com/project/SLUG/` +Archived: `https://web.archive.org/web/TIMESTAMP/https://inlanderrestaurantweek.com/project/SLUG/` + +## Page Framework +The site uses WordPress + Divi theme. Relevant container class: `et_pb_text_inner`. +Each course section typically occupies one or two `et_pb_text_inner` divs. + +--- + +## Course Layout Types + +### Layout A — Heading and items in SEPARATE divs (most restaurants) +```html +

First Course

+
+

Dish Name
Description

+

Dish Name 2
Description 2

+
+

Second Course

+... +``` + +### Layout B — Heading and items in SAME div (tavolata, durkins, table13, others) +```html +
+

First Course

+

Dish Name
Description

+

Dish Name 2
Description 2

+
+
+

Second Course

+ ... +
+``` + +--- + +## Dish Name Tag Styles + +### Style 1 — `` tag (most restaurants) +Examples: 315cuisine, anthonys, bardenay, barkrescuepub, etc. +```html +

Dish Name
Description text here

+

Dish Name
With space before br

+``` + +### Style 2 — `` tag with `
` inside (India House, Lebanon, Karma, ponderosa) +```html +

Dish Name
Description text

+

Dish Name
Description without span

+``` +Key: name is inside ``, the `
` is INSIDE the `` tag. + +### Style 3 — `` + `` combo (1898 restaurant) +```html +

First PartSecond Part Description

+``` +Full dish name = "First Part" + " " + "Second Part" + +--- + +## Field Extraction Patterns + +### Name (from page title) +``` +Restaurant Name | Inlander Restaurant Week +``` +Regex: `(.+?) \| Inlander` + +### Price (WARNING: unreliable — use price listing page instead) +```html +<h1 style="text-align: left;"><strong>$45</strong></h1> +``` +Regex: `<strong>\$(\d+)</strong>` +PROBLEM: Some pages show drink prices like $22 that match before the real price. +SOLUTION: Always build an authoritative slug→price map from the price listing page. + +### Price Listing Page — Authoritative Prices +URL: `https://inlanderrestaurantweek.com/price/` (or Wayback archived version) +```html +<article class="et_pb_portfolio_item ... project_category_45 ..."> + ... + <a href="https://inlanderrestaurantweek.com/project/SLUG/"> +``` +Extract price tier from `project_category_(25|35|45)` CSS class. +Extract slug from `href=".../project/SLUG/"`. + +### Cuisine +```html +CUISINE: AMERICAN COMFORT FOOD +``` +Often inside `<strong>` or `<em>` tags. Extract uppercase text after "CUISINE:". +Apply `.ToTitleCase()` for proper formatting. + +### Phone +Area codes: 509 (Spokane area) or 208 (Idaho/CDA area) +Pattern: `(509) 555-1234` or `(208) 555-1234` +Regex: `\((?:208|509)\) \d{3}-\d{4}` + +### Hours +``` +Menu served 5pm-9pm nightly +Menu served Thursday-Sunday, 5-9pm +``` +Regex: `Menu served [^<]+` + +### Area +Look for area keywords (ALL CAPS in source) anywhere in the HTML: +- DOWNTOWN, NORTH SPOKANE, SOUTH SPOKANE, WEST SPOKANE, SPOKANE VALLEY +- AIRWAY HEIGHTS, LIBERTY LAKE, COEUR D'ALENE, POST FALLS, HAYDEN, ATHOL, WORLEY +Default to ["Downtown"] if nothing matched. +Some restaurants appear in multiple areas — collect all matches. + +--- + +## Dietary Tag Filtering +Skip these as dish names — they appear in `<strong>` but are dietary labels, not dish names: +- GF (gluten free) +- GFA (gluten free available) +- V, V+ (vegetarian, vegan) +- DF, DFA (dairy free, dairy free available) +- V:, V+A (legend lines) +- 2025 (year marker some restaurants include) +- Drink (some restaurants label beverage course) + +Full regex: `^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$` +Also skip names matching `^[A-Z]{1,3}:` (legend lines like "GF: Gluten Free") +Also skip names shorter than 3 chars or longer than 80 chars. + +--- + +## Restaurants by Known HTML Style (2025) + +**Layout B (same-block)**: tavolata, durkins, table13, terraza, and others +**Style 2 (`<b>` tags)**: indiahouse, lebanon, karma, ponderosa, collectivekitchen, dryfly, masselowslounge, vieuxcarre, wileys, osprey, shawnodonnells, ganderryegrass +**Style 3 (`<b>`+`<strong>` combo)**: 1898 + +Note: These styles may change year to year as restaurants update their pages. +Always check a few representative pages before assuming the same structure applies. + +--- + +## JS-Only Pages (no static HTML menu content) +These restaurants had no recoverable menu data from any Wayback snapshot in 2025: +heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza + +Their pages are fully JS-rendered — the static HTML captured by Wayback Machine +shows the page shell but not the menu content. For future years, these may or may not +have static caches depending on server-side rendering changes. diff --git a/memory/scraping-guide.md b/memory/scraping-guide.md new file mode 100644 index 0000000..924eac2 --- /dev/null +++ b/memory/scraping-guide.md @@ -0,0 +1,237 @@ +# IRW Scraping Guide — Full Process for Adding a New Year + +## Overview +The Inlander Restaurant Week website (inlanderrestaurantweek.com) is WordPress/Divi. +Menu pages are partially JS-rendered but WP-Super-Cache creates static HTML snapshots +that the Wayback Machine archives. We scrape those static snapshots. + +--- + +## Step 1: Find Restaurant Slugs + +Fetch the price listing page to get all slugs for that year: +```bash +curl -s "https://web.archive.org/web/TIMESTAMP/https://inlanderrestaurantweek.com/price/" \ + -o /tmp/irw-price-YEAR.html +``` + +Pick a timestamp close to the event (Wayback Machine format: YYYYMMDDHHmmss). +The price listing page has portfolio items like: +```html +<article class="et_pb_portfolio_item ... project_category_45"> + <a href="https://inlanderrestaurantweek.com/project/SLUG/"> +``` +Extract slug from the href. The class `project_category_(25|35|45)` gives authoritative price. + +**Important**: Scrape the price listing page FIRST and save the slug→price map. +Some restaurant pages have drink prices ($22, $33) that confuse the price parser. + +--- + +## Step 2: Scrape Each Restaurant Page + +Use a PowerShell script (written to project dir, copied to local temp to run): + +**Wayback Machine URL format**: +``` +https://web.archive.org/web/TIMESTAMP/https://inlanderrestaurantweek.com/project/SLUG/ +``` + +**Key fields to extract**: +```powershell +# Name +$nameM = [regex]::Match($html, '<title>(.+?) \| Inlander') + +# Price (from page, but USE PRICE LISTING MAP - this can be wrong) +$priceM = [regex]::Match($html, '<strong>\$(\d+)</strong>') + +# Cuisine +$cuisineM = [regex]::Match($html, 'CUISINE:\s*([A-Z][A-Za-z/ ]+?)(?:\s*</|\s*<)') +$cuisine = (Get-Culture).TextInfo.ToTitleCase($c.ToLower()) + +# Phone +$phoneM = [regex]::Match($html, '\((?:208|509)\) \d{3}-\d{4}') + +# Hours +$hoursM = [regex]::Match($html, 'Menu served [^<]+') + +# Area (match against known area keys, case-insensitive) +$areaMap keys: "AIRWAY HEIGHTS","ATHOL","COEUR D'ALENE","POST FALLS","HAYDEN", + "LIBERTY LAKE","NORTH SPOKANE","SOUTH SPOKANE","SPOKANE VALLEY", + "WEST SPOKANE","WORLEY","DOWNTOWN" +``` + +**Rate limiting**: Add `Start-Sleep -Milliseconds 2000` between each request. +After a 429, stop and wait 30+ minutes before trying again. + +--- + +## Step 3: Parse Menu Courses + +### Course Block Extraction (`Get-CourseBlock`) +Two HTML layouts exist: + +**Layout A** (most common): heading and items in SEPARATE `et_pb_text_inner` blocks +```powershell +# Strategy 1: find content between this label and next label +$m = [regex]::Match($html, [regex]::Escape($label) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')', $opts) + +# Strategy 3 (fallback): items in next et_pb_text_inner block +$im = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts) +``` + +**Layout B** (some restaurants — tavolata, durkins, table13, etc.): heading + items in SAME block +```powershell +# Strategy 2: extract <p> tags after </h3> within same div +$sameDivM = [regex]::Match($sub, '(?s)</h[123]>\s*(<p.+?)(?=</div>)', $opts) +``` + +### Dish Parsing (`Parse-Dish`) +Three tag styles exist: + +**Style 1** (most restaurants): `<strong>` for name +```html +<p><strong>Dish Name</strong><br/>Description text</p> +``` + +**Style 2** (India House, Lebanon, Karma, others): `<b>` with `<br/>` before `</b>` +```html +<p><b>Dish Name <br/></b><span>Description text</span></p> +``` + +**Style 3** (1898): `<b>` + `<strong>` combination +```html +<p><span><b>Part1</b></span><strong>Part2</strong> Description</p> +``` + +**Multi-strategy parser** (handles all three): +```powershell +function Parse-Dish($pContent) { + $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline + + # Style 2: <b>Name <br/></b> + $bWithBrM = [regex]::Match($pContent, '(?s)<b>(.*?)<br\s*/?>', $opts) + if ($bWithBrM.Success) { + $name = Get-CleanText $bWithBrM.Groups[1].Value + if (Test-ValidDishName $name) { + $desc = Get-CleanText ($pContent.Substring($bWithBrM.Index + $bWithBrM.Length)) + return [PSCustomObject]@{ name = $name; desc = $desc } + } + } + + # Style 3: <b>Part1</b>...<strong>Part2</strong> + $bM = [regex]::Match($pContent, '(?s)<b>(.*?)</b>', $opts) + if ($bM.Success) { + $namePart = Get-CleanText $bM.Groups[1].Value + if (Test-ValidDishName $namePart) { + $afterB = $pContent.Substring($bM.Index + $bM.Length) + $sM2 = [regex]::Match($afterB, '(?s)^[^<]*<strong>(.*?)</strong>(.*)', $opts) + if ($sM2.Success) { + $p2 = Get-CleanText $sM2.Groups[1].Value + if (-not (Test-DietaryTag $p2) -and $p2.Length -ge 2) { + return [PSCustomObject]@{ name = "$namePart $p2".Trim(); desc = Get-CleanText $sM2.Groups[2].Value } + } + } + return [PSCustomObject]@{ name = $namePart; desc = Get-CleanText $afterB } + } + } + + # Style 1: <strong>Name</strong> + $sM = [regex]::Match($pContent, '(?s)<strong>(.*?)</strong>', $opts) + if ($sM.Success) { + $name = Get-CleanText $sM.Groups[1].Value + if (-not (Test-ValidDishName $name)) { return $null } + $afterBr = '' + if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] } + else { $am = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts); if ($am.Success) { $afterBr = $am.Groups[1].Value } } + return [PSCustomObject]@{ name = $name; desc = Get-CleanText $afterBr } + } + return $null +} + +function Test-ValidDishName($name) { + $name.Length -ge 3 -and $name.Length -le 80 -and + $name -notmatch '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' -and + $name -notmatch '^[A-Z]{1,3}:' +} + +function Test-DietaryTag($str) { + $str -match '^(GF|GFA|V\+?|DF|DFA|V:|2025|Drink|V\+A)$' +} +``` + +### HTML Cleanup +```powershell +function Get-CleanText($rawHtml) { + $t = $rawHtml -replace '<[^>]+>', ' ' + $t = $t -replace '&', '&' -replace ''', "'" -replace '"', '"' + $t = $t -replace '<', '<' -replace '>', '>' -replace ' ', ' ' + $t = $t -replace '–', '-' -replace '—', '-' + ($t -replace '\s+', ' ').Trim() +} +``` + +--- + +## Step 4: Fix Prices + +After scraping, apply authoritative prices from the price listing page: +- Parse `project_category_(25|35|45)` CSS class from portfolio items +- Match slug from adjacent `href` attribute +- Build a hashtable and apply to all entries + +Common gotcha: Restaurant pages may show $22 (wine), $33 (lunch) — these are NOT the event price. + +--- + +## Step 5: Recover Missing Restaurants + +If a restaurant has 0/0/0 courses: +1. Try alternate Wayback timestamps: `20250401000000`, `20250415000000`, `20250501000000`, `20250601000000` +2. Check if page uses Layout B (same-block) — add Strategy 2 to course block extractor +3. Check if page uses `<b>` tags instead of `<strong>` for dish names + +**Known JS-only restaurants** (no static cache recoverable for 2025): +heritage, kismet, littlenoodle, macdaddys, purgatory, redtail, republickitchen, republicpi, vicinopizza + +--- + +## Step 6: Output and Validation + +```powershell +# Save as UTF-8 (important — special characters in restaurant names) +$json = $data | ConvertTo-Json -Depth 10 +[System.IO.File]::WriteAllText($outPath, $json, [System.Text.Encoding]::UTF8) + +# Validate: list any restaurant not at 3/3/3 +$data | Where-Object { + $_.menu.courses.'First Course'.Count -ne 3 -or + $_.menu.courses.'Second Course'.Count -ne 3 -or + $_.menu.courses.'Third Course'.Count -ne 3 +} | ForEach-Object { + "$($_.slug): $($_.menu.courses.'First Course'.Count)/$($_.menu.courses.'Second Course'.Count)/$($_.menu.courses.'Third Course'.Count)" +} +``` + +--- + +## PowerShell Script Execution Pattern (REQUIRED) + +```bash +# Write script to project dir (via Write tool or Edit) +# Then in bash: +cp "//WinServ-20-3.chns.local/Profiles/derekc/Documents/Coding Projects/.../script.ps1" \ + "/c/Users/derekc.CHNSLocal/AppData/Local/Temp/script.ps1" +powershell.exe -ExecutionPolicy Bypass -File "C:\Users\derekc.CHNSLocal\AppData\Local\Temp\script.ps1" +``` + +**Never** use `powershell -Command "..."` for multi-line scripts — escaping is unreliable. +**Never** try to run `.ps1` directly from `\\WinServ-20-3...` UNC path — execution policy blocks it. + +--- + +## PowerShell Gotchas +- `"$slug: text"` fails if `:` follows var — use `"${slug}: text"` +- Function names like `Is-X`, `Decode-X`, `Parse-X` get PSScriptAnalyzer warnings (unapproved verbs) but work fine +- `return ,$array` (comma prefix) forces PowerShell to return an array, not unroll it +- `[System.IO.File]::WriteAllText(path, json, UTF8)` — use this, not `Out-File`, to avoid BOM/encoding issues diff --git a/rescrape-missing.ps1 b/rescrape-missing.ps1 deleted file mode 100644 index af521c4..0000000 --- a/rescrape-missing.ps1 +++ /dev/null @@ -1,179 +0,0 @@ -# rescrape-missing.ps1 - Re-fetches 0-course and partial restaurants -# using CDX API to find best available Wayback Machine snapshot - -$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition -$jsonPath = Join-Path $projectDir '2025-restaurants.json' - -$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json - -function Decode-Html($str) { - if (-not $str) { return $str } - $s = $str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ' - $s.Trim() -} - -function Get-CleanText($rawHtml) { - $t = $rawHtml -replace '<[^>]+>', ' ' - $t = Decode-Html $t - $t.Trim() -} - -function Invoke-Dishes($courseHtml) { - $dishes = [System.Collections.ArrayList]@() - $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline - - $pMatches = [regex]::Matches($courseHtml, '<p[^>]*>(.*?)</p>', $opts) - foreach ($pm in $pMatches) { - $pContent = $pm.Groups[1].Value - if ($pContent -notmatch '<strong>') { continue } - - $nameM = [regex]::Match($pContent, '<strong>(.*?)</strong>', $opts) - if (-not $nameM.Success) { continue } - $name = Get-CleanText $nameM.Groups[1].Value - - if ($name -match '^(GF|GFA|V\+?|DF|V:|2025|Drink)') { continue } - if ($name.Length -lt 3 -or $name.Length -gt 80) { continue } - if ($name -match '^[A-Z]{1,3}:') { continue } - - $afterBr = '' - if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] } - else { - $afterStrong = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts) - if ($afterStrong.Success) { $afterBr = $afterStrong.Groups[1].Value } - } - $desc = Get-CleanText $afterBr - $null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc }) - } - return ,$dishes -} - -function Invoke-CourseBlock($html, $courseLabel, $nextLabel) { - $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline - if ($nextLabel) { - $pattern = [regex]::Escape($courseLabel) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')' - $m = [regex]::Match($html, $pattern, $opts) - if ($m.Success) { return $m.Groups[1].Value } - } - $idx = $html.IndexOf($courseLabel) - if ($idx -ge 0) { - $sub = $html.Substring($idx, [Math]::Min(6000, $html.Length - $idx)) - $innerM = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts) - if ($innerM.Success) { return $innerM.Groups[1].Value } - } - return '' -} - -function Parse-RestaurantHtml($html) { - $fc = Invoke-CourseBlock $html 'First Course' 'Second Course' - $sc = Invoke-CourseBlock $html 'Second Course' 'Third Course' - $tc = Invoke-CourseBlock $html 'Third Course' $null - return @{ - first = Invoke-Dishes $fc - second = Invoke-Dishes $sc - third = Invoke-Dishes $tc - hours = if ($html -match 'Menu served ([^<]+)') { "Menu served $($matches[1].Trim())" } else { '' } - phone = if ($html -match '\((?:208|509)\) \d{3}-\d{4}') { $matches[0] } else { '' } - } -} - -# Find which restaurants need re-scraping -$needsRescrape = $data | Where-Object { - $c1 = $_.menu.courses.'First Course'.Count - $c2 = $_.menu.courses.'Second Course'.Count - $c3 = $_.menu.courses.'Third Course'.Count - ($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or - ($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3) -} - -Write-Host "Restaurants to re-scrape: $($needsRescrape.Count)" -Write-Host "" - -foreach ($r in $needsRescrape) { - $slug = $r.slug - Write-Host "[$slug] Looking up CDX snapshots..." -NoNewline - - try { - # CDX API: find snapshots from Jan-May 2025 - $cdxUrl = "https://web.archive.org/cdx/search/cdx?url=inlanderrestaurantweek.com/project/$slug/&output=text&limit=10&from=20250101&to=20250501&filter=statuscode:200&fl=timestamp" - $cdxResp = Invoke-WebRequest -Uri $cdxUrl -UseBasicParsing -TimeoutSec 30 -ErrorAction Stop - $timestamps = $cdxResp.Content -split "`n" | Where-Object { $_ -match '^\d{14}$' } - - if ($timestamps.Count -eq 0) { - Write-Host " No CDX snapshots found" - continue - } - - Write-Host " Found $($timestamps.Count) snapshots" - - $best = $null - foreach ($ts in $timestamps) { - Write-Host " Trying $ts..." -NoNewline - try { - $pageUrl = "https://web.archive.org/web/$ts/https://inlanderrestaurantweek.com/project/$slug/" - $resp = Invoke-WebRequest -Uri $pageUrl -UseBasicParsing -TimeoutSec 45 -ErrorAction Stop - $html = $resp.Content - - $parsed = Parse-RestaurantHtml $html - $c1 = $parsed.first.Count - $c2 = $parsed.second.Count - $c3 = $parsed.third.Count - Write-Host " $c1/$c2/$c3" - - # Better than what we have? - $curr1 = $r.menu.courses.'First Course'.Count - $curr2 = $r.menu.courses.'Second Course'.Count - $curr3 = $r.menu.courses.'Third Course'.Count - $currTotal = $curr1 + $curr2 + $curr3 - $newTotal = $c1 + $c2 + $c3 - - if ($newTotal -gt $currTotal -or ($c1 -ge 3 -and $c2 -ge 3 -and $c3 -ge 3)) { - $best = $parsed - if ($c1 -ge 3 -and $c2 -ge 3 -and $c3 -ge 3) { break } - } - } catch { - Write-Host " FETCH ERROR" - } - Start-Sleep -Milliseconds 400 - } - - if ($best) { - if ($best.first.Count -gt $r.menu.courses.'First Course'.Count -or - $best.second.Count -gt $r.menu.courses.'Second Course'.Count -or - $best.third.Count -gt $r.menu.courses.'Third Course'.Count) { - Write-Host " -> Updating with $($best.first.Count)/$($best.second.Count)/$($best.third.Count) courses" - $r.menu.courses.'First Course' = @($best.first) - $r.menu.courses.'Second Course' = @($best.second) - $r.menu.courses.'Third Course' = @($best.third) - if ($best.hours -and -not $r.menu.hours) { $r.menu.hours = $best.hours } - if ($best.phone -and -not $r.menu.phone) { $r.menu.phone = $best.phone } - } - } else { - Write-Host " -> No improvement found" - } - - } catch { - Write-Host " CDX ERROR: $_" - } - Start-Sleep -Milliseconds 500 -} - -# ---- Final report ---- -Write-Host "" -Write-Host "=== Final Status ===" -$data | Where-Object { - $c1 = $_.menu.courses.'First Course'.Count - $c2 = $_.menu.courses.'Second Course'.Count - $c3 = $_.menu.courses.'Third Course'.Count - ($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or - ($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3) -} | ForEach-Object { - $c1 = $_.menu.courses.'First Course'.Count - $c2 = $_.menu.courses.'Second Course'.Count - $c3 = $_.menu.courses.'Third Course'.Count - Write-Host " $($_.slug): $c1/$c2/$c3" -} - -$json = $data | ConvertTo-Json -Depth 10 -[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8) -Write-Host "" -Write-Host "Saved to $jsonPath" diff --git a/rescrape2-missing.ps1 b/rescrape2-missing.ps1 deleted file mode 100644 index 177b39f..0000000 --- a/rescrape2-missing.ps1 +++ /dev/null @@ -1,166 +0,0 @@ -# rescrape2-missing.ps1 - Re-fetches problematic restaurants with multiple timestamps -# Uses fixed timestamps (no CDX API) with generous delays to avoid rate limiting - -$projectDir = Split-Path -Parent $MyInvocation.MyCommand.Definition -$jsonPath = Join-Path $projectDir '2025-restaurants.json' - -$data = Get-Content $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json - -function Decode-Html($str) { - if (-not $str) { return $str } - ($str -replace '&','&' -replace ''',"'" -replace '"','"' -replace '<','<' -replace '>','>' -replace ' ',' ' -replace '\s+',' ').Trim() -} - -function Get-CleanText($rawHtml) { - Decode-Html ($rawHtml -replace '<[^>]+>', ' ') -} - -function Invoke-Dishes($courseHtml) { - $dishes = [System.Collections.ArrayList]@() - $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline - $pMatches = [regex]::Matches($courseHtml, '<p[^>]*>(.*?)</p>', $opts) - foreach ($pm in $pMatches) { - $pContent = $pm.Groups[1].Value - if ($pContent -notmatch '<strong>') { continue } - $nameM = [regex]::Match($pContent, '<strong>(.*?)</strong>', $opts) - if (-not $nameM.Success) { continue } - $name = Get-CleanText $nameM.Groups[1].Value - if ($name -match '^(GF|GFA|V\+?|DF|V:|2025|Drink)') { continue } - if ($name.Length -lt 3 -or $name.Length -gt 80) { continue } - if ($name -match '^[A-Z]{1,3}:') { continue } - $afterBr = '' - if ($pContent -match '(?s)<br\s*/?>(.*?)$') { $afterBr = $matches[1] } - else { - $am = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts) - if ($am.Success) { $afterBr = $am.Groups[1].Value } - } - $desc = Get-CleanText $afterBr - $null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc }) - } - return ,$dishes -} - -function Invoke-CourseBlock($html, $courseLabel, $nextLabel) { - $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline - if ($nextLabel) { - $m = [regex]::Match($html, ([regex]::Escape($courseLabel) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')'), $opts) - if ($m.Success) { return $m.Groups[1].Value } - } - $idx = $html.IndexOf($courseLabel) - if ($idx -ge 0) { - $sub = $html.Substring($idx, [Math]::Min(6000, $html.Length - $idx)) - $im = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts) - if ($im.Success) { return $im.Groups[1].Value } - } - return '' -} - -function Fetch-And-Parse($url) { - $resp = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 45 -ErrorAction Stop - $html = $resp.Content - # Check if it's a 429 page - if ($html -match '429 Too Many Requests') { throw "Rate limited" } - $fc = Invoke-CourseBlock $html 'First Course' 'Second Course' - $sc = Invoke-CourseBlock $html 'Second Course' 'Third Course' - $tc = Invoke-CourseBlock $html 'Third Course' $null - return @{ - first = Invoke-Dishes $fc - second = Invoke-Dishes $sc - third = Invoke-Dishes $tc - hours = if ($html -match 'Menu served ([^<]+)') { "Menu served $($matches[1].Trim())" } else { '' } - phone = if ($html -match '\((?:208|509)\) \d{3}-\d{4}') { $matches[0] } else { '' } - total = 0 - } -} - -# Timestamps to try for each restaurant (spanning Feb-May 2025) -$timestamps = @( - '20250301000000', - '20250308000000', - '20250315000000', - '20250401000000', - '20250415000000', - '20250501000000' -) - -# Find problematic restaurants -$problems = $data | Where-Object { - $c1 = $_.menu.courses.'First Course'.Count - $c2 = $_.menu.courses.'Second Course'.Count - $c3 = $_.menu.courses.'Third Course'.Count - ($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or - ($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3) -} - -Write-Host "Restaurants to retry: $($problems.Count)" -Write-Host "Starting with 3-second delay between requests..." -Write-Host "" - -$i = 0 -foreach ($r in $problems) { - $i++ - $slug = $r.slug - $curr1 = $r.menu.courses.'First Course'.Count - $curr2 = $r.menu.courses.'Second Course'.Count - $curr3 = $r.menu.courses.'Third Course'.Count - Write-Host "[$i/$($problems.Count)] $slug (currently $curr1/$curr2/$curr3)" - - $bestResult = $null - $bestTotal = $curr1 + $curr2 + $curr3 - - foreach ($ts in $timestamps) { - $url = "https://web.archive.org/web/$ts/https://inlanderrestaurantweek.com/project/$slug/" - Write-Host " Trying $ts..." -NoNewline - try { - $result = Fetch-And-Parse $url - $t = $result.first.Count + $result.second.Count + $result.third.Count - Write-Host " $($result.first.Count)/$($result.second.Count)/$($result.third.Count)" - if ($t -gt $bestTotal) { - $bestTotal = $t - $bestResult = $result - if ($result.first.Count -ge 3 -and $result.second.Count -ge 3 -and $result.third.Count -ge 3) { - break # Perfect - no need to try more timestamps - } - } - } catch { - Write-Host " FAIL: $_" - } - Start-Sleep -Milliseconds 3000 - } - - if ($bestResult -and $bestTotal -gt ($curr1 + $curr2 + $curr3)) { - Write-Host " -> Updating: $($bestResult.first.Count)/$($bestResult.second.Count)/$($bestResult.third.Count)" - $r.menu.courses.'First Course' = @($bestResult.first) - $r.menu.courses.'Second Course' = @($bestResult.second) - $r.menu.courses.'Third Course' = @($bestResult.third) - if ($bestResult.hours -and -not $r.menu.hours) { $r.menu.hours = $bestResult.hours } - if ($bestResult.phone -and -not $r.menu.phone) { $r.menu.phone = $bestResult.phone } - } else { - Write-Host " -> No improvement" - } - Start-Sleep -Milliseconds 2000 -} - -Write-Host "" -Write-Host "=== Final Status ===" -$remaining = $data | Where-Object { - $c1 = $_.menu.courses.'First Course'.Count - $c2 = $_.menu.courses.'Second Course'.Count - $c3 = $_.menu.courses.'Third Course'.Count - ($c1 -eq 0 -and $c2 -eq 0 -and $c3 -eq 0) -or ($c1 -ne 3 -or $c2 -ne 3 -or $c3 -ne 3) -} -Write-Host "Still incomplete: $($remaining.Count)" -foreach ($r in $remaining) { - $c1 = $_.menu.courses.'First Course'.Count - $c2 = $_.menu.courses.'Second Course'.Count - $c3 = $_.menu.courses.'Third Course'.Count - $c1 = $r.menu.courses.'First Course'.Count - $c2 = $r.menu.courses.'Second Course'.Count - $c3 = $r.menu.courses.'Third Course'.Count - Write-Host " $($r.slug): $c1/$c2/$c3" -} - -$json = $data | ConvertTo-Json -Depth 10 -[System.IO.File]::WriteAllText($jsonPath, $json, [System.Text.Encoding]::UTF8) -Write-Host "" -Write-Host "Saved to $jsonPath" diff --git a/scrape-2025.ps1 b/scrape-2025.ps1 deleted file mode 100644 index 441a992..0000000 --- a/scrape-2025.ps1 +++ /dev/null @@ -1,227 +0,0 @@ -# scrape-2025.ps1 - Scrapes 2025 Inlander Restaurant Week menus from Wayback Machine -# Run from local path (UNC paths block PS execution) - -$slugs = @( - "1898", "24taps", "315cuisine", "ambrosia", "anthonys", "arrowhead", "baba", - "backyardpublichouse", "bangkokthai", "bardenay", "barkrescuepub", "beverlys", - "blackpearl", "borracho", "burgerdock", "cascadia", "cedars", "centennial", - "chaps", "chinook", "chowderhead", "clinkerdagger", "cochinito", "collectivekitchen", - "dassteinhaus", "deleons", "deleonstexmex", "dockside", "downriver", "dryfly", - "durkins", "east", "emrys", "feastworldkitchen", "flameandcork", "flatstick", - "flyinggoat", "fortheloveofgod", "francaise", "ganderryegrass", "gardenparty", - "gildedunicorn", "hang10", "heritage", "hogwash", "honey", "hulapot", - "indiahouse", "indicana", "inlandpacifickitchen", "irongoat", "ironwoodice", - "karma", "kasa", "kismet", "kunisthai", "latahbistro", "lebanon", "legendsoffire", - "littledragon", "littlenoodle", "longhornbbq", "loren", "lumberbeard", - "macdaddys", "mackenzieriver", "mammamias", "mangotree", "maryhill", - "masselowslounge", "max", "meltingpot", "mortys", "northhill", "odohertys", - "osprey", "outsider", "palmcourtgrill", "ponderosa", "purenorthwest", - "purgatory", "qqsushi", "redtail", "republickitchen", "republicpi", "rut", - "safariroom", "saranac", "satay", "sauced", "screamingyak", "seasons", - "shawnodonnells", "shelbys", "skewers", "southhillgrill", "southperrylantern", - "spencers", "steamplant", "steelhead", "stylus", "sweetlous", "swinglounge", - "table13", "tavolata", "terraza", "thaibamboo", "thedambar", "titos", - "tomatostreet", "tonysonthelake", "torratea", "truelegends", "twigs", - "uprise", "vaqueros", "vicinopizza", "victoryburger", "vieuxcarre", - "vineolive", "wileys" -) - -$areaMap = [ordered]@{ - "AIRWAY HEIGHTS" = "Airway Heights" - "ATHOL" = "Athol" - "COEUR D'ALENE" = "Coeur d'Alene" - "POST FALLS" = "Post Falls" - "HAYDEN" = "Hayden" - "LIBERTY LAKE" = "Liberty Lake" - "NORTH SPOKANE" = "North Spokane" - "SOUTH SPOKANE" = "South Spokane" - "SPOKANE VALLEY" = "Spokane Valley" - "WEST SPOKANE" = "West Spokane" - "WORLEY" = "Worley" - "DOWNTOWN" = "Downtown" -} - -function Get-CleanText($rawHtml) { - $t = $rawHtml -replace '<[^>]+>', ' ' - $t = $t -replace '&', '&' - $t = $t -replace '<', '<' - $t = $t -replace '>', '>' - $t = $t -replace '"', '"' - $t = $t -replace ''', "'" - $t = $t -replace ' ', ' ' - $t = $t -replace '–', '-' - $t = $t -replace '—', '-' - $t = $t -replace '\s+', ' ' - $t.Trim() -} - -function Extract-Dishes($courseHtml) { - $dishes = [System.Collections.ArrayList]@() - $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline - - $pMatches = [regex]::Matches($courseHtml, '<p[^>]*>(.*?)</p>', $opts) - - foreach ($pm in $pMatches) { - $pContent = $pm.Groups[1].Value - if ($pContent -notmatch '<strong>') { continue } - - # First <strong> = dish name - $nameM = [regex]::Match($pContent, '<strong>(.*?)</strong>', $opts) - if (-not $nameM.Success) { continue } - $name = Get-CleanText $nameM.Groups[1].Value - - # Skip dietary-only names and very short strings - if ($name -match '^(GF|GFA|V\+?|DF|V:|2025)$') { continue } - if ($name.Length -lt 3) { continue } - if ($name -match '^[A-Z]{1,3}:') { continue } # skip legend lines like "GF:" - if ($name.Length -gt 80) { continue } - - # Description: everything after first <br/> - $afterBr = '' - if ($pContent -match '(?s)<br\s*/?>(.*?)$') { - $afterBr = $matches[1] - } else { - $afterStrong = [regex]::Match($pContent, '(?s)</strong>(.*?)$', $opts) - if ($afterStrong.Success) { $afterBr = $afterStrong.Groups[1].Value } - } - - $desc = Get-CleanText $afterBr - $null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc }) - } - - return ,$dishes -} - -function Extract-CourseBlock($html, $courseLabel, $nextLabel) { - $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline - - # Strategy 1: find content in et_pb_text_inner after course label, before next label - if ($nextLabel) { - $pattern = [regex]::Escape($courseLabel) + '(.+?)(?=' + [regex]::Escape($nextLabel) + ')' - $m = [regex]::Match($html, $pattern, $opts) - if ($m.Success) { return $m.Groups[1].Value } - } - - # Strategy 2: find the et_pb_text_inner block immediately following the course label - $idx = $html.IndexOf($courseLabel) - if ($idx -ge 0) { - $sub = $html.Substring($idx, [Math]::Min(6000, $html.Length - $idx)) - # Skip past the heading block and find the next text_inner content - $innerM = [regex]::Match($sub, '(?s)et_pb_text_inner">(?!<h[123])(.+?)(?=et_pb_text_inner"><h|</div>\s*</div>\s*</div>\s*</div>\s*<div)', $opts) - if ($innerM.Success) { return $innerM.Groups[1].Value } - } - return '' -} - -$restaurants = [System.Collections.ArrayList]@() -$total = $slugs.Count -$i = 0 - -foreach ($slug in $slugs) { - $i++ - Write-Host "[$i/$total] Fetching: $slug" -NoNewline - - $url = "https://web.archive.org/web/20250306132630/https://inlanderrestaurantweek.com/project/$slug/" - - try { - $response = Invoke-WebRequest -Uri $url -UseBasicParsing -TimeoutSec 60 -ErrorAction Stop - # Read content as bytes then decode as UTF-8 to preserve special chars - $bytes = $response.RawContentStream.ToArray() - $html = [System.Text.Encoding]::UTF8.GetString($bytes) - - # --- Name --- - $nameM = [regex]::Match($html, '<title>(.+?) \| Inlander') - $name = if ($nameM.Success) { $nameM.Groups[1].Value.Trim() } else { $slug } - - # --- Price (from <strong>$45</strong> in an h1) --- - $priceM = [regex]::Match($html, '<strong>\$(\d+)</strong>') - $price = if ($priceM.Success) { [int]$priceM.Groups[1].Value } else { 0 } - - # --- Cuisine --- - $cuisineM = [regex]::Match($html, 'CUISINE:\s*([A-Z][A-Za-z/ ]+?)(?:\s*</|\s*<)') - $cuisine = '' - if ($cuisineM.Success) { - $c = $cuisineM.Groups[1].Value.Trim() - $cuisine = (Get-Culture).TextInfo.ToTitleCase($c.ToLower()) - } - - # --- Phone --- - $phoneM = [regex]::Match($html, '\((?:208|509)\) \d{3}-\d{4}') - $phone = if ($phoneM.Success) { $phoneM.Value } else { '' } - - # --- Area(s) --- - $areas = [System.Collections.ArrayList]@() - $htmlUpper = $html.ToUpper() - foreach ($aKey in $areaMap.Keys) { - if ($htmlUpper.Contains($aKey)) { - $null = $areas.Add($areaMap[$aKey]) - } - } - $areas = @($areas | Select-Object -Unique) - if ($areas.Count -eq 0) { $areas = @('Downtown') } - - # --- Hours --- - $hoursM = [regex]::Match($html, 'Menu served [^<]+') - $hours = if ($hoursM.Success) { $hoursM.Value.Trim() } else { '' } - - # --- Menu Courses --- - $fc = Extract-CourseBlock $html 'First Course' 'Second Course' - $sc = Extract-CourseBlock $html 'Second Course' 'Third Course' - $tc = Extract-CourseBlock $html 'Third Course' $null - - $firstCourse = Extract-Dishes $fc - $secondCourse = Extract-Dishes $sc - $thirdCourse = Extract-Dishes $tc - - $fc1count = $firstCourse.Count - $fc2count = $secondCourse.Count - $fc3count = $thirdCourse.Count - Write-Host " -> $name [$price] $fc1count/$fc2count/$fc3count courses" - - $null = $restaurants.Add([PSCustomObject]@{ - name = $name - slug = $slug - price = $price - areas = $areas - cuisine = $cuisine - url = "https://inlanderrestaurantweek.com/project/$slug/" - menu = [PSCustomObject]@{ - hours = $hours - phone = $phone - courses = [PSCustomObject]@{ - 'First Course' = @($firstCourse) - 'Second Course' = @($secondCourse) - 'Third Course' = @($thirdCourse) - } - } - }) - - } catch { - Write-Host " ERROR: $_" - $null = $restaurants.Add([PSCustomObject]@{ - name = $slug - slug = $slug - price = 0 - areas = @('Downtown') - cuisine = '' - url = "https://inlanderrestaurantweek.com/project/$slug/" - menu = [PSCustomObject]@{ - hours = 'FETCH_ERROR' - phone = '' - courses = [PSCustomObject]@{ - 'First Course' = @() - 'Second Course' = @() - 'Third Course' = @() - } - } - }) - } - - Start-Sleep -Milliseconds 500 -} - -$outPath = 'C:\Users\derekc.CHNSLocal\AppData\Local\Temp\2025-restaurants.json' -$json = $restaurants | ConvertTo-Json -Depth 10 -[System.IO.File]::WriteAllText($outPath, $json, [System.Text.Encoding]::UTF8) -Write-Host "" -Write-Host "Done! Saved $($restaurants.Count) restaurants to $outPath" diff --git a/test-parse.ps1 b/test-parse.ps1 deleted file mode 100644 index c78e32e..0000000 --- a/test-parse.ps1 +++ /dev/null @@ -1,119 +0,0 @@ -# Test parsing on local 315cuisine HTML - -$html = [System.IO.File]::ReadAllText('C:\Users\DEREKC~1.CHN\AppData\Local\Temp\test-restaurant.html', [System.Text.Encoding]::UTF8) - -$areaMap = [ordered]@{ - "AIRWAY HEIGHTS" = "Airway Heights" - "ATHOL" = "Athol" - "COEUR D'ALENE" = "Coeur d'Alene" - "POST FALLS" = "Post Falls" - "HAYDEN" = "Hayden" - "LIBERTY LAKE" = "Liberty Lake" - "NORTH SPOKANE" = "North Spokane" - "SOUTH SPOKANE" = "South Spokane" - "SPOKANE VALLEY" = "Spokane Valley" - "WEST SPOKANE" = "West Spokane" - "WORLEY" = "Worley" - "DOWNTOWN" = "Downtown" -} - -function Get-CleanText($rawHtml) { - $t = $rawHtml -replace '<[^>]+>', ' ' - $t = $t -replace '&', '&' - $t = $t -replace '<', '<' - $t = $t -replace '>', '>' - $t = $t -replace '"', '"' - $t = $t -replace ''', "'" - $t = $t -replace ' ', ' ' - $t = $t -replace '\s+', ' ' - $t.Trim() -} - -function Extract-Dishes($courseHtml) { - $dishes = [System.Collections.ArrayList]@() - $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline - - $pMatches = [regex]::Matches($courseHtml, '<p[^>]*>(.*?)</p>', $opts) - - foreach ($pm in $pMatches) { - $pContent = $pm.Groups[1].Value - if ($pContent -notmatch '<strong>') { continue } - - $nameM = [regex]::Match($pContent, '<strong>(.*?)</strong>', $opts) - if (-not $nameM.Success) { continue } - $name = Get-CleanText $nameM.Groups[1].Value - - if ($name -match '^(GF|GFA|V\+?|DF|V:)$') { continue } - if ($name.Length -lt 3) { continue } - if ($name -match '^[A-Z]{1,3}:') { continue } - if ($name.Length -gt 80) { continue } - - $afterBr = '' - if ($pContent -match '<br\s*/?>(.*?)$') { - $afterBr = $matches[1] - } else { - $afterStrong = [regex]::Match($pContent, '</strong>(.*?)$', $opts) - if ($afterStrong.Success) { $afterBr = $afterStrong.Groups[1].Value } - } - - $desc = Get-CleanText $afterBr - $null = $dishes.Add([PSCustomObject]@{ name = $name; desc = $desc }) - } - - return ,$dishes -} - -function Extract-CourseBlock($html, $courseLabel) { - $opts = [System.Text.RegularExpressions.RegexOptions]::Singleline - $pattern = [regex]::Escape($courseLabel) + '.{0,300}?et_pb_text_inner">(.+?)(?=<h[123]|et_pb_text_inner"><h|V:\s*<|Vegetarian item|et_pb_row_inner_[23])' - $m = [regex]::Match($html, $pattern, $opts) - if ($m.Success) { return $m.Groups[1].Value } - - $idx = $html.IndexOf($courseLabel) - if ($idx -ge 0) { - $sub = $html.Substring($idx, [Math]::Min(4000, $html.Length - $idx)) - $innerM = [regex]::Match($sub, 'et_pb_text_inner">(.*?)(?=et_pb_text_inner|</div></div></div>)', $opts) - if ($innerM.Success) { return $innerM.Groups[1].Value } - } - return '' -} - -$nameM = [regex]::Match($html, '<title>(.+?) \| Inlander') -Write-Host "Name: $($nameM.Groups[1].Value.Trim())" - -$priceM = [regex]::Match($html, '<strong>\$(\d+)</strong>') -Write-Host "Price: $($priceM.Groups[1].Value)" - -$cuisineM = [regex]::Match($html, 'CUISINE:\s*([A-Z][A-Za-z /]+?)(?:\s*</|\s*<)') -$cuisine = (Get-Culture).TextInfo.ToTitleCase($cuisineM.Groups[1].Value.Trim().ToLower()) -Write-Host "Cuisine: $cuisine" - -$phoneM = [regex]::Match($html, '\((?:208|509)\) \d{3}-\d{4}') -Write-Host "Phone: $($phoneM.Value)" - -$hoursM = [regex]::Match($html, 'Menu served [^<]+') -Write-Host "Hours: $($hoursM.Value.Trim())" - -$areas = @() -foreach ($aKey in $areaMap.Keys) { - if ($html.ToUpper().Contains($aKey)) { $areas += $areaMap[$aKey] } -} -Write-Host "Areas: $($areas -join ', ')" - -Write-Host "" -Write-Host "--- FIRST COURSE ---" -$fc = Extract-CourseBlock $html 'First Course' -$firstCourse = Extract-Dishes $fc -foreach ($d in $firstCourse) { Write-Host " [$($d.name)] | $($d.desc)" } - -Write-Host "" -Write-Host "--- SECOND COURSE ---" -$sc = Extract-CourseBlock $html 'Second Course' -$secondCourse = Extract-Dishes $sc -foreach ($d in $secondCourse) { Write-Host " [$($d.name)] | $($d.desc)" } - -Write-Host "" -Write-Host "--- THIRD COURSE ---" -$tc = Extract-CourseBlock $html 'Third Course' -$thirdCourse = Extract-Dishes $tc -foreach ($d in $thirdCourse) { Write-Host " [$($d.name)] | $($d.desc)" }