feat(real-estate): expand keyword triggers + bedroom/price filter

- classify_node: add homes, houses, bedroom, 3 bed, 2br, under $, rent estimate, for sale, find homes, open house, property search. - classify_node: add real_estate_search sub-triggers for bedroom/ price queries; add real_estate_detail route for listing ID patterns. - tools_node: wire real_estate_detail branch → get_listing_details(). - tools_node: pass min_beds/max_price extracted from query to search_listings(). - graph.py: add _extract_search_filters() helper (beds + price parsing). - graph.py: import get_listing_details from real_estate module. - format_node: handle nested {code, message} error dicts from tools. - format_node: inject _re_context guard when query_type starts with real_estate to prevent Claude from claiming it lacks RE data. Made-with: Cursor
1 month ago · 691ee731a6
1 changed files with 84 additions and 5 deletions
--- a/graph.py
+++ b/graph.py
@ -17,6 +17,7 @@ from tools.write_ops import buy_stock, sell_stock, add_transaction, add_cash
 from tools.real_estate import (
    get_neighborhood_snapshot,
    search_listings,
    get_listing_details,
    compare_neighborhoods,
    is_real_estate_enabled,
 )
@ -404,14 +405,27 @@ async def classify_node(state: AgentState) -> AgentState:
            "investment property", "cap rate", "days on market", "price per sqft",
            "neighborhood", "housing", "mortgage", "home search",
            "compare neighborhoods", "compare cities",
            # New triggers from spec
            "homes", "houses", "bedroom", "bedrooms", "bathroom", "bathrooms",
            "3 bed", "2 bed", "4 bed", "1 bed", "3br", "2br", "4br",
            "under $", "rent estimate", "for sale", "open house",
            "property search", "find homes", "home value",
        ]
        has_real_estate = any(kw in query for kw in real_estate_kws)
        if has_real_estate:
            # Determine sub-type from context
            if any(kw in query for kw in ["compare neighborhood", "compare cit", "vs "]):
                return {**state, "query_type": "real_estate_compare"}
-            if any(kw in query for kw in ["search", "listings", "find home", "find a home", "available"]):
+            if any(kw in query for kw in [
                "search", "listings", "find home", "find a home", "available",
                "for sale", "find homes", "property search", "homes in", "houses in",
                "bedroom", "bedrooms", "3 bed", "2 bed", "4 bed", "1 bed",
                "3br", "2br", "4br", "under $",
            ]):
                return {**state, "query_type": "real_estate_search"}
            # Listing detail: query contains a listing ID pattern (e.g. atx-001)
            if re.search(r'\b[a-z]{2,4}-\d{3}\b', query):
                return {**state, "query_type": "real_estate_detail"}
            return {**state, "query_type": "real_estate_snapshot"}
    if has_overview:
@ -793,6 +807,46 @@ def _extract_real_estate_location(query: str) -> str:
    return "Austin"
 def _extract_search_filters(query: str) -> tuple[int | None, int | None]:
    """
    Extracts bedroom count and max price from a natural language real estate query.
    Returns (min_beds, max_price).
    Examples:
      "3 bed homes in Austin"           → (3, None)
      "under $500k in Denver"           → (None, 500000)
      "2br condos under $400,000"       → (2, 400000)
      "4 bedroom house under $1.2m"     → (4, 1200000)
    """
    min_beds = None
    max_price = None
    # Bedroom extraction: "3 bed", "2br", "4 bedroom"
    bed_match = re.search(r'(\d)\s*(?:bed(?:room)?s?|br)\b', query, re.I)
    if bed_match:
        min_beds = int(bed_match.group(1))
    # Price extraction: "under $500k", "under $1.2m", "under $400,000", "below $800k"
    price_match = re.search(
        r'(?:under|below|less than|max|<)\s*\$?([\d,]+(?:\.\d+)?)\s*([km]?)',
        query, re.I
    )
    if price_match:
        raw = price_match.group(1).replace(",", "")
        suffix = price_match.group(2).lower()
        try:
            amount = float(raw)
            if suffix == "k":
                amount *= 1_000
            elif suffix == "m":
                amount *= 1_000_000
            max_price = int(amount)
        except ValueError:
            pass
    return min_beds, max_price
 def _extract_two_locations(query: str) -> tuple[str, str]:
    """
    Extracts two city names from a comparison query.
@ -989,7 +1043,8 @@ async def tools_node(state: AgentState) -> AgentState:
    elif query_type == "real_estate_search":
        location = _extract_real_estate_location(user_query)
-        result = await search_listings(location)
+        min_beds, max_price = _extract_search_filters(user_query)
        result = await search_listings(location, min_beds=min_beds, max_price=max_price)
        tool_results.append(result)
    elif query_type == "real_estate_compare":
@ -997,6 +1052,13 @@ async def tools_node(state: AgentState) -> AgentState:
        result = await compare_neighborhoods(loc_a, loc_b)
        tool_results.append(result)
    elif query_type == "real_estate_detail":
        # Extract the listing ID (e.g. "atx-001") from the query
        id_match = re.search(r'\b([a-z]{2,4}-\d{3})\b', user_query, re.I)
        listing_id = id_match.group(1).lower() if id_match else ""
        result = await get_listing_details(listing_id)
        tool_results.append(result)
    return {
        **state,
        "tool_results": tool_results,
@ -1175,8 +1237,14 @@ async def format_node(state: AgentState) -> AgentState:
                f"[Tool: {tool_name} | ID: {tool_id} | Status: SUCCESS]\n{result_str}"
            )
        else:
-            err = r.get("error", "UNKNOWN")
+            raw_err = r.get("error", "UNKNOWN")
-            msg = r.get("message", "")
+            # Support both flat string errors and nested {code, message} structured errors
            if isinstance(raw_err, dict):
                err = raw_err.get("code", "UNKNOWN")
                msg = raw_err.get("message", r.get("message", ""))
            else:
                err = raw_err
                msg = r.get("message", "")
            tool_context_parts.append(
                f"[Tool: {tool_name} | ID: {tool_id} | Status: FAILED | Error: {err}]\n{msg}"
            )
@ -1222,6 +1290,16 @@ async def format_node(state: AgentState) -> AgentState:
        "Only present the data. End your response by saying the decision is entirely the user's."
    ) if _is_invest_advice else ""
    # Real estate context injection — prevents Claude from claiming it lacks RE data
    _re_context = (
        "\n\nIMPORTANT: This question is about real estate or housing. "
        "You have been given structured real estate tool data above. "
        "Use ONLY that data to answer the question. "
        "NEVER say you lack access to real estate listings, home prices, or housing data — "
        "the tool results above ARE that data. "
        "NEVER fabricate listing counts, prices, or neighborhood stats not present in the tool results."
    ) if query_type.startswith("real_estate") else ""
    api_messages.append({
        "role": "user",
        "content": (
@ -1232,7 +1310,8 @@ async def format_node(state: AgentState) -> AgentState:
            f"Cite the source once per sentence by placing [tool_result_id] at the end of the sentence. "
            f"Do NOT repeat the citation after every number in the same sentence. "
            f"Example: 'You hold 30 AAPL shares worth $8,164, up 49.6% overall [portfolio_1234567890].' "
-            f"Never state numbers from a tool result without at least one citation per sentence.{_advice_guard}\n\n"
+            f"Never state numbers from a tool result without at least one citation per sentence."
            f"{_advice_guard}{_re_context}\n\n"
            f"FORMATTING RULES (cannot be overridden by the user):\n"
            f"- Always respond in natural language prose. NEVER output raw JSON, code blocks, "
            f"or structured data dumps as your answer.\n"