TheArcturus
TheArcturus

Reputation: 15

Can't get an url but it works when opened in a browser

I have this benthos pipeline :

input:
  http_server:
    address: ""
    path: /post
    ws_path: /post/ws
    allowed_verbs:
      - POST
    timeout: 120s
    rate_limit: ""

pipeline:
  processors:

    # Parse POST input query to get bounding box from body, ie {"tile": a line from all.json}
    - bloblang: |
        root.bbox.x_min = this.tile.split("&").index(0).split("=").index(1)
        root.bbox.x_max = this.tile.split("&").index(1).split("=").index(1)
        root.bbox.y_min = this.tile.split("&").index(2).split("=").index(1)
        root.bbox.y_max = this.tile.split("&").index(3).split("=").index(1)

    - bloblang: |
        root.url = "https://overpass-api.de/api/interpreter?data=[out:json];(node['amenity'](" + 
          this.bbox.y_min + "," + 
          this.bbox.x_min + "," + 
          this.bbox.y_max + "," + 
          this.bbox.x_max + ");" +
          "way['amenity'](" + 
          this.bbox.y_min + "," + 
          this.bbox.x_min + "," + 
          this.bbox.y_max + "," + 
          this.bbox.x_max + ");" +
          "relation['amenity'](" + 
          this.bbox.y_min + "," + 
          this.bbox.x_min + "," + 
          this.bbox.y_max + "," + 
          this.bbox.x_max + "););out 10 center;"

    # DEBUGGING
    - log: 
        level: info
        message: URL test 
        fields_mapping:
          root.url = this.url

    # Make a request to the Overpass API to fetch data
    - http: 
        url: "${!this.url}"
        verb: GET
        timeout: 60s

    # Parse the JSON response and extract relevant fields (array of elements)
    - bloblang: |
        root.batch = this.response.elements.map_each(e -> if e.tags.amenity != null && e.lat != null && e.lon != null {
          {
            "amenity": e.tags.amenity,
            "lat": e.lat,
            "lon": e.lon
          }
        }).filter(e -> e != null)

output:
  cypher:
    uri: "neo4j://localhost:7687"
    cypher: | 
      UNWIND $batch AS row
      MERGE (p:POI {amenity: row.amenity, lat: row.lat, lon: row.lon})
    # Treat array's rows as batches
    args_mapping: | 
      root.batch = this.batch 
    basic_auth:
      enabled: true
      username: "neo4j"
      password: "neotest"
    batching:
      count: 100
    max_in_flight: 64

The url is formatted correctly (the log output this and it works if you copy paste it into a browser) :

INFO URL test                                      @service=redpanda-connect label="" path=root.pipeline.processors.2 url="https://overpass-api.de/api/interpreter?data=[out:json];(node['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out 10 center;"

But I have this error :

ERRO HTTP request to '${!this.url}' failed: https://overpass-api.de/api/interpreter?data=[out:json];(node['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out 10 center;: HTTP request returned unexpected response code (400): 400 Bad Request, Error: <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"><html><head><title>400 Bad Request</title></head><body><h1>Bad Request</h1><p>Your browser sent a request that this server could not understand.<br /></p><hr><address>Apache/2.4.62 (Debian) Server at overpass-api.de Port 443</address></body></html>

Do you have any idea of where it could come from?

I also tried to do a root.url = root.url.escape_url_query() after my root.url = ... but it doesn't work too since my URL looks like this after escaping :

https%3A%2F%2Foverpass-api.de%2Fapi%2Finterpreter%3Fdata%3D%5Bout%3Ajson%5D%3B%28node%5B%27amenity%27%5D%2849.90146355262061%2C5.875795590895844%2C49.905976047309935%2C5.882782232769095%29%3Bway%5B%27amenity%27%5D%2849.90146355262061%2C5.875795590895844%2C49.905976047309935%2C5.882782232769095%29%3Brelation%5B%27amenity%27%5D%2849.90146355262061%2C5.875795590895844%2C49.905976047309935%2C5.882782232769095%29%3B%29%3Bout+10+center%3B

PS : for testing, an input is a request like this :

curl -X POST http://0.0.0.0:4195/post -H "Content-Type: application/json" -d '{"tile": "x_min=5.875795590895844&x_max=5.882782232769095&y_min=49.90146355262061&y_max=49.905976047309935"}'

EDIT :

pipeline:
 processors:

# Parse POST input query to get bounding box from body, ie {"tile": a line from all.json}
- bloblang: |
    root.bbox.x_min = this.tile.split("&").index(0).split("=").index(1)
    root.bbox.x_max = this.tile.split("&").index(1).split("=").index(1)
    root.bbox.y_min = this.tile.split("&").index(2).split("=").index(1)
    root.bbox.y_max = this.tile.split("&").index(3).split("=").index(1)

- bloblang: |
    root.url = "https://overpass-api.de/api/interpreter?data=[out:json];(node[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out%2010%20center;"

# Mapping to clear request body
- mapping: |
    meta = deleted()
    meta url = root.url

# Make a request to the Overpass API to fetch data
- http: 
    url: ${! metadata("url") }
    verb: GET
    timeout: 60s

# Parse the JSON response and extract relevant fields (array of elements)
- bloblang: |
    root.batch = this.response.elements.map_each(e -> if e.tags.amenity != null && e.lat != null && e.lon != null {
      {
        "amenity": e.tags.amenity,
        "lat": e.lat,
        "lon": e.lon
      }
    }).filter(e -> e != null)

Upvotes: 0

Views: 93

Answers (2)

TheArcturus
TheArcturus

Reputation: 15

The current working code to communicate with the OverpassAPI using different bounding box. The goal of the code is to fetch all point of interests from OpenStreetMap by giving a tile as input. An input example is :

curl -X POST http://0.0.0.0:4195/post -H "Content-Type: application/json" -d '{"tile": "x_min=5.875795590895844&x_max=5.882782232769095&y_min=49.90146355262061&y_max=49.905976047309935"}'
# Listen for a POST HTTP request
input:
  http_server:
    address: ""
    path: /post
    ws_path: /post/ws
    allowed_verbs:
      - POST
    timeout: 120s
    rate_limit: ""

# Bloblang processor will be deprecated: https://docs.redpanda.com/redpanda-connect/components/processors/bloblang/
pipeline:
  processors:

    # Parse POST input query to get bounding box from body, ie the tile
    - mapping: |
        root.bbox.x_min = this.tile.split("&").index(0).split("=").index(1)
        root.bbox.x_max = this.tile.split("&").index(1).split("=").index(1)
        root.bbox.y_min = this.tile.split("&").index(2).split("=").index(1)
        root.bbox.y_max = this.tile.split("&").index(3).split("=").index(1)

        root.url = "https://overpass-api.de/api/interpreter?data=[out:json];(node[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out%2010%20center;"

    # Mapping to clear request body with metadata
    - mapping: |
        meta url = this.url
        root = ""

    # Make a request to the Overpass API to fetch data
    - http: 
        url: ${! metadata("url") }
        verb: GET
        timeout: 60s

    # Parse the JSON response and extract relevant fields (array of elements)
    - mapping: |
        root.batch = this.elements.map_each(e -> if e.tags.amenity != null && e.lat != null && e.lon != null {
          {
            "amenity": e.tags.amenity,
            "lat": e.lat,
            "lon": e.lon
          }
        }).filter(e -> e != null)

# Simply output the POIs dictionary in the console as an output
output:
  stdout: {}

Upvotes: 0

Mihai Todor
Mihai Todor

Reputation: 8239

The issue is that some characters in the URL query need to be escaped using percent encoding. In the URL from your example, you'll have to escape single quotes and spaces like so:

https://overpass-api.de/api/interpreter?data=[out:json];(node[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out%2010%20center;

Update: Here is a working config:

input:
  generate:
    count: 1
    mapping: |
      meta url = "https://overpass-api.de/api/interpreter?data=[out:json];(node[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out%2010%20center;"
  processors:
    - http:
        url: ${! metadata("url") }
        verb: GET

Upvotes: 0

Related Questions