Python Requests: How to get value of Blank Hidden Input

Question

I'm trying to scrape this site: https://case.occ.ok.gov/ords/f?p=1004:203

The missing piece of the puzzle is figuring out how to "get" the p_request parameter in the data payload prior to making the final request. This field comes up empty when looking at the "main" page, so cannot use that to pass through to my POST request.

The code below doesn't work because I have a blank p_request parameter in the payload, although I know through testing with developer console that it will work if I am able to get the p_request field.

# Query Main Site to Build Payload
url = 'https://case.occ.ok.gov/ords/f?p=1004:203'
r = requests.get(url)
soup = BeautifulSoup(r.text,'lxml')

# Get Cookie
cookies = {}
cookdat = r.cookies
cookies['ORA_WWV_APP_1004'] = cookdat.get('ORA_WWV_APP_1004')
cookies['X-Oracle-BMC-LBS-Route'] = cookdat.get('X-Oracle-BMC-LBS-Route')

# Create Payload
inputs = soup.select('input')
d_inputs = {i['id']:i.get('value','') for i in inputs}

data = [
  ('p_flow_id', '1004'),
  ('p_flow_step_id', '203'),
  ('p_instance', '%s'%d_inputs['pInstance']),
  ('p_debug', ''),
  ('p_request', ''),
  ('p_widget_name', 'worksheet'),
  ('p_widget_mod', 'PULL'),
  ('p_widget_action', ''),
  ('p_widget_num_return', '100000'),
  ('x01', '8980043036046866'),
  ('x02', '8985720770049096'),
  ('f01', 'R8980010866046866_column_search_current_column'),
  ('f01', 'R8980010866046866_search_field'),
  ('f01', 'R8980010866046866_row_select'),
  ('f02', ''),
  ('f02', ''),
  ('f02', '50'),
  ('p_json', '{"pageItems":{"itemsToSubmit":[{"n":"P203_LASTNAME","v":"%s"},{"n":"P203_FIRSTNAME","v":""},{"n":"P203_SEARCH_CRITERIA","v":"1"}],"protected":"%s","rowVersion":"","formRegionChecksums":[]},"salt":"%s"}'%(letter,d_inputs['pPageItemsProtected'],d_inputs['pSalt'])),
]

# POST request retrieve data
r = requests.post('https://case.occ.ok.gov/ords/wwv_flow.ajax', cookies=cookies, data=data)
print(r.text)

In developer console, I see this field appears when making the type of submission I want, even though it is blank in the main page:

How do I "retrieve" this field, which is necessary for the request to work?

Владислав Небеснюк · Accepted Answer

That work for me

import requests
import json
from bs4 import BeautifulSoup

# globals
users = []
letter = "A"

# session
session = requests.Session()

# get page
auth = session.get('https://case.occ.ok.gov/ords/f?p=1004:203')
soup = BeautifulSoup(auth.text, 'html.parser')

inputs = soup.select('input')
d_inputs = {i['id']: i.get('value', '') for i in inputs}

# create params
params = {
  'p_flow_id': d_inputs['pFlowId'],
  'p_flow_step_id': d_inputs['pFlowStepId'],
  'p_instance': d_inputs['pInstance'],
  'p_debug': '',
  'p_request': 'Search',
  'p_reload_on_submit': d_inputs['pReloadOnSubmit'],
  'p_page_submission_id': d_inputs['pPageSubmissionId'],
  'p_json': json.dumps({"pageItems": {
    "itemsToSubmit": [
      {"n": "P203_LASTNAME", "v": "{}".format(letter)},
      {"n": "P203_FIRSTNAME", "v": ""},
      {"n": "P203_SEARCH_CRITERIA", "v": "1"}
    ],
    "protected": d_inputs['pPageItemsProtected'],
    "rowVersion": "",
    "formRegionChecksums": []
    },
    "salt": d_inputs['pSalt']
  })
}

# Send request to APEX
session.post(
  'https://case.occ.ok.gov/ords/wwv_flow.accept', data=params
)

# get page with data (first)
data_page = session.get(
  'https://case.occ.ok.gov/ords/f?p=1004:203:{}::NO:::'.format(
    d_inputs['pInstance']
  )
)

table_soup = BeautifulSoup(data_page.text, 'html.parser')

# new params
inputs = table_soup.select('input')
d_inputs = {i['id']: i.get('value', '') for i in inputs}
json_ajax_data = json.loads(data_page.text.split(
    'interactiveReport('
)[1].split(');})();')[0])

# get data for next pages
params_news = {
    'p_flow_id': params['p_flow_id'],
    'p_flow_step_id': params['p_flow_step_id'],
    'p_instance': params['p_instance'],
    'p_debug': '',
    'p_request': 'PLUGIN={}'.format(json_ajax_data['ajaxIdentifier']),
    'p_widget_name': 'worksheet',
    'p_widget_mod': 'ACTION',
    'p_widget_action': 'PAGE',
    'p_widget_action_mod': 'pgR_min_row=51max_rows=50rows_fetched=50',
    'p_widget_num_return': 50,
    'x01': d_inputs['R8980010866046866_worksheet_id'],
    'x02': d_inputs['R8980010866046866_worksheet_id'],
    'p_json': params['p_json']
}

# get next page data
next_page = session.post(
  'https://case.occ.ok.gov/ords/wwv_flow.ajax', data=params_news
)

next_page_soup = BeautifulSoup(next_page.text, 'html.parser')
next_page_table_with_data = table_soup.find('table', {'class': 'a-IRR-table'})
next_page_rows = next_page_table_with_data.find_all('tr')

# parse rows
for row_next_page in next_page_rows:
  cells_next_page = row_next_page.find_all('td')
  if len(cells_next_page) > 0:
    users.append(
      {
        'name': cells_next_page[0].text, 'surname': cells_next_page[1].text
      }
    )

print(users)

[
   {'name': 'ANDERSON', 'surname': 'MICHAEL L AND KAREN'}, 
   {'name': 'ALVAREZ', 'surname': 'PETRA'},
   ...
]

Python Requests: How to get value of Blank Hidden Input

Answers (1)

Related Questions