SMTH
SMTH

Reputation: 95

Can't produce a result without hardcoding cookies within the headers of the following script

I'm trying to scrape the zestimate for this address, 1205 Clover St, Accokeek, MD 20607 from this webpage using requests module. When I use that address in the inputbox of that webpage, I get this zpid 37374749 number, which, when I use within params and issue a post request, I can get the zestimate.

The problem is the script works only when I hardcode cookies from dev tools within the headers of the requests; otherwise, it fails miserably and throws a JSONDecodeError.

import requests
from pprint import pprint
from bs4 import BeautifulSoup

url = 'https://www.zillow.com/graphql/'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    'referer': 'https://www.zillow.com/how-much-is-my-home-worth/',
    'cookie': '_px3=00d7bf4fc2e331a793b7ebf9f9e8059013b86b39ead67dfc15fd89fe4e1b459c:GrviXjUIrsA7lYSeSBB8Y8yQedr9ng0ByK7xkaB6CLDFCGYjxBL2r8dPy6MwnUk0Eqv75g0t1fcIa4c8ksIHoA==:1000:1qeUILwk/42tR6FaZ0ybaI63rAWDBAc7fsAeLcH02MyzXKEEoT0SDnk/zMCLA3Zj1BpBmuqO/4RQDntxiOgtsCGPF1VuxfpmXAeMPcxegynJ/PGXWYE3OLIa9vJ9XkVkGaSoH9knD3Ls7nHyuP/0DhapZbThlzDLoQ//Vflzi0eGv12lCSX/2msW+rCeFgO0RhsBMNSN93EUhmuMBXcBMg==;'

}
params = {"operationName":"HowMuchIsMyHomeWorthReviewQuery","variables":{"zpid":37374749},"query":"query HowMuchIsMyHomeWorthReviewQuery($zpid: ID!) {\n  property(zpid: $zpid) {\n    streetAddress\n    city\n    state\n    zipcode\n    bedrooms\n    bathrooms\n    livingArea\n    zestimate\n    homeStatus\n    photos(size: XL) {\n      url\n      __typename\n    }\n    ...OmpHomeWorthUpsell_property\n    isConfirmedClaimedByCurrentSignedInUser\n    isVerifiedClaimedByCurrentSignedInUser\n    ...UARequiredPropertyDimensions_property\n    ...ContactAgentForm_property\n    ...HomeInfo_property\n    __typename\n  }\n  viewer {\n    ...ContactAgentForm_viewer\n    __typename\n  }\n  abTests {\n    ...OmpHomeWorthUpsell_abTests\n    ...UARequiredPropertyDimensions_abTests\n    ...ContactAgentForm_abTests\n    __typename\n  }\n}\n\nfragment OmpHomeWorthUpsell_property on Property {\n  zpid\n  onsiteMessage(placementNames: [\"HMIMHWTopSlot\"]) {\n    ...onsiteMessage_fragment\n    __typename\n  }\n  __typename\n}\n\nfragment onsiteMessage_fragment on OnsiteMessageResultType {\n  eventId\n  decisionContext\n  messages {\n    skipDisplayReason\n    shouldDisplay\n    isGlobalHoldout\n    isPlacementHoldout\n    placementName\n    testPhase\n    bucket\n    placementId\n    passThrottle\n    lastModified\n    eventId\n    decisionContext\n    selectedTreatment {\n      id\n      name\n      component\n      status\n      renderingProps\n      lastModified\n      __typename\n    }\n    qualifiedTreatments {\n      id\n      name\n      status\n      lastModified\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment OmpHomeWorthUpsell_abTests on ABTests {\n  HMIMHW_ZO_NFS_UPSELL_ONSITE_MESSAGING: abTest(\n    trial: \"HMIMHW_ZO_NFS_UPSELL_ONSITE_MESSAGING\"\n  )\n  __typename\n}\n\nfragment UARequiredPropertyDimensions_property on Property {\n  currency\n  featuredListingTypeDimension\n  hasPublicVideo\n  hdpTypeDimension\n  listingTypeDimension\n  price\n  propertyTypeDimension\n  standingOffer {\n    isStandingOfferEligible\n    __typename\n  }\n  zpid\n  isZillowOwned\n  zillowOfferMarket {\n    legacyName\n    __typename\n  }\n  ...ShouldShowVideo_property\n  __typename\n}\n\nfragment ShouldShowVideo_property on Property {\n  homeStatus\n  isZillowOwned\n  hasPublicVideo\n  primaryPublicVideo {\n    sources {\n      src\n      __typename\n    }\n    __typename\n  }\n  richMediaVideos {\n    mp4Url\n    hlsUrl\n    __typename\n  }\n  __typename\n}\n\nfragment UARequiredPropertyDimensions_abTests on ABTests {\n  ZO_HDP_HOUR_ONE_VIDEO: abTest(trial: \"ZO_HDP_HOUR_ONE_VIDEO\")\n  __typename\n}\n\nfragment ContactAgentForm_property on Property {\n  streetAddress\n  state\n  city\n  zipcode\n  zpid\n  homeStatus\n  homeType\n  zestimate\n  homeType\n  isInstantOfferEnabled\n  zillowOfferMarket {\n    name\n    code\n    __typename\n  }\n  __typename\n}\n\nfragment ContactAgentForm_viewer on Viewer {\n  name\n  email\n  zuid\n  __typename\n}\n\nfragment ContactAgentForm_abTests on ABTests {\n  SHOW_PL_LEAD_FORM: abTest(trial: \"SHOW_PL_LEAD_FORM\")\n  __typename\n}\n\nfragment HomeInfo_property on Property {\n  streetAddress\n  city\n  state\n  zipcode\n  bedrooms\n  bathrooms\n  livingArea\n  homeStatus\n  homeType\n  contingentListingType\n  photos(size: XL) {\n    url\n    __typename\n  }\n  listing_sub_type {\n    is_newHome\n    is_FSBO\n    is_bankOwned\n    is_foreclosure\n    is_forAuction\n    is_comingSoon\n    __typename\n  }\n  __typename\n}\n"}

with requests.Session() as s:
    s.headers.update(headers)
    res = s.post(url,json=params)
    pprint(res.json()['data']['property']['zestimate'])

How can I find success without hardcoding cookies within the headers?

Upvotes: 2

Views: 125

Answers (1)

Andrej Kesely
Andrej Kesely

Reputation: 195528

To get zpid dynamically, you have to first submit a request where you put the address. Then use this zpid in second request:

import requests

api_url = "https://www.zillowstatic.com/autocomplete/v3/suggestions/"
graphql_url = "https://www.zillow.com/graphql/"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
    "referer": "https://www.zillow.com/how-much-is-my-home-worth/",
}

with requests.session() as s:
    s.headers.update(headers)
    params = {
        "q": "1205 Clover St, Accokeek, MD 20607",
        "resultTypes": "allAddress",
        "resultCount": "5",
    }

    data = s.get(api_url, params=params).json()
    zpid = data["results"][0]["metaData"]["zpid"]

    payload = {
        "operationName": "HowMuchIsMyHomeWorthReviewQuery",
        "query": 'query HowMuchIsMyHomeWorthReviewQuery($zpid: ID!) {\n  property(zpid: $zpid) {\n    streetAddress\n    city\n    state\n    zipcode\n    bedrooms\n    bathrooms\n    livingArea\n    zestimate\n    homeStatus\n    photos(size: XL) {\n      url\n      __typename\n    }\n    ...OmpHomeWorthUpsell_property\n    isConfirmedClaimedByCurrentSignedInUser\n    isVerifiedClaimedByCurrentSignedInUser\n    ...UARequiredPropertyDimensions_property\n    ...ContactAgentForm_property\n    ...HomeInfo_property\n    __typename\n  }\n  viewer {\n    ...ContactAgentForm_viewer\n    __typename\n  }\n  abTests {\n    ...OmpHomeWorthUpsell_abTests\n    ...UARequiredPropertyDimensions_abTests\n    ...ContactAgentForm_abTests\n    __typename\n  }\n}\n\nfragment OmpHomeWorthUpsell_property on Property {\n  zpid\n  onsiteMessage(placementNames: ["HMIMHWTopSlot"]) {\n    ...onsiteMessage_fragment\n    __typename\n  }\n  __typename\n}\n\nfragment onsiteMessage_fragment on OnsiteMessageResultType {\n  eventId\n  decisionContext\n  messages {\n    skipDisplayReason\n    shouldDisplay\n    isGlobalHoldout\n    isPlacementHoldout\n    placementName\n    testPhase\n    bucket\n    placementId\n    passThrottle\n    lastModified\n    eventId\n    decisionContext\n    selectedTreatment {\n      id\n      name\n      component\n      status\n      renderingProps\n      lastModified\n      __typename\n    }\n    qualifiedTreatments {\n      id\n      name\n      status\n      lastModified\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment OmpHomeWorthUpsell_abTests on ABTests {\n  HMIMHW_ZO_NFS_UPSELL_ONSITE_MESSAGING: abTest(\n    trial: "HMIMHW_ZO_NFS_UPSELL_ONSITE_MESSAGING"\n  )\n  __typename\n}\n\nfragment UARequiredPropertyDimensions_property on Property {\n  currency\n  featuredListingTypeDimension\n  hasPublicVideo\n  hdpTypeDimension\n  listingTypeDimension\n  price\n  propertyTypeDimension\n  standingOffer {\n    isStandingOfferEligible\n    __typename\n  }\n  zpid\n  isZillowOwned\n  zillowOfferMarket {\n    legacyName\n    __typename\n  }\n  ...ShouldShowVideo_property\n  __typename\n}\n\nfragment ShouldShowVideo_property on Property {\n  homeStatus\n  isZillowOwned\n  hasPublicVideo\n  primaryPublicVideo {\n    sources {\n      src\n      __typename\n    }\n    __typename\n  }\n  richMediaVideos {\n    mp4Url\n    hlsUrl\n    __typename\n  }\n  __typename\n}\n\nfragment UARequiredPropertyDimensions_abTests on ABTests {\n  ZO_HDP_HOUR_ONE_VIDEO: abTest(trial: "ZO_HDP_HOUR_ONE_VIDEO")\n  __typename\n}\n\nfragment ContactAgentForm_property on Property {\n  streetAddress\n  state\n  city\n  zipcode\n  zpid\n  homeStatus\n  homeType\n  zestimate\n  homeType\n  isInstantOfferEnabled\n  zillowOfferMarket {\n    name\n    code\n    __typename\n  }\n  __typename\n}\n\nfragment ContactAgentForm_viewer on Viewer {\n  name\n  email\n  zuid\n  __typename\n}\n\nfragment ContactAgentForm_abTests on ABTests {\n  SHOW_PL_LEAD_FORM: abTest(trial: "SHOW_PL_LEAD_FORM")\n  __typename\n}\n\nfragment HomeInfo_property on Property {\n  streetAddress\n  city\n  state\n  zipcode\n  bedrooms\n  bathrooms\n  livingArea\n  homeStatus\n  homeType\n  contingentListingType\n  photos(size: XL) {\n    url\n    __typename\n  }\n  listing_sub_type {\n    is_newHome\n    is_FSBO\n    is_bankOwned\n    is_foreclosure\n    is_forAuction\n    is_comingSoon\n    __typename\n  }\n  __typename\n}\n',
        "variables": {"zpid": zpid},
    }

    data = s.post(graphql_url, json=payload).json()
    print(data["data"]["property"]["zestimate"])

Prints:

444700

Upvotes: 2

Related Questions