Kuldeep Avsar
Kuldeep Avsar

Reputation: 13

Connection Refused while scraping the data

I am trying to Scrape the Job Titles one by one from the Indeed.co.in website but it through me connection refused problem while I am visiting to the particular jobs Title categories page and trying to take response back from the page but It's shows error on that time. please help me out to solve this problem i am tried to solve this but this not solved. Please help.

2020/03/07 09:08:41 Error to Connect with Indeed Jobs Category Page. Get https://indeed.co.in/browsejobs/Engineering: dial tcp 169.44.165.69:443: connect: connection refused
package main
import (
    "crypto/tls"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"

    "github.com/PuerkitoBio/goquery"
)
func GetBrowseJobs(Url string) {
    response, err := http.Get(Url)
    if err != nil {
        log.Println("Error to Connect with Indeed Home page.", err)
        return
    }
    defer response.Body.Close()
    document, err := goquery.NewDocumentFromReader(response.Body)
    if err != nil {
        log.Fatal("Error loading HTTP response body", err.Error())
        return
    }
    document.Find("a.icl-GlobalFooter-link").Each(processElement)
}
func processElement(index int, element *goquery.Selection) {
    href, exists := element.Attr("href")
    if exists {
        BrowseJobsPage(href)
        return
    }
}
func BrowseJobsPage(Urls string) {
    fmt.Println(Urls)
    response, err := http.Get(Urls)
    if err != nil {
        log.Println("Error to Connect with Indeed Browse Jobs Page.", err)
        return
    }
    defer response.Body.Close()
    document, err := goquery.NewDocumentFromReader(response.Body)
    if err != nil {
        log.Fatal("Error loading HTTP response body", err.Error())
        return
    }
    document.Find("table#categories tbody tr td a").Each(Processjobs)
    fmt.Println("***********************************************************************")
}
func Processjobs(index int, element *goquery.Selection) {
    href, exists := element.Attr("href")
    if exists {
        PerJobsTitlePage(href)
        return
    }
}

func PerJobsTitlePage(Urls string) {
    fmt.Println(Urls)
    tlsConfig := &tls.Config{
        InsecureSkipVerify: true,
    }
    transport := &http.Transport{
        TLSClientConfig: tlsConfig,
    }
    client := http.Client{Transport: transport}
    response, err := client.Get("https://indeed.co.in" + Urls)
    if err != nil {
        log.Println("Error to Connect with Indeed Jobs Category Page.", err)
        return
    }
    defer response.Body.Close()
    body, err := ioutil.ReadAll(response.Body)
    if err != nil {
        log.Println("Page response is nil", nil)
    }
    document, err := goquery.NewDocumentFromReader(response.Body)
    if err != nil {
        log.Fatal("Error loading HTTP response body", err.Error())
        return
    }
    document.Find("table#titles tbody tr td p.job a").Each(ProcessSinglejob)
    fmt.Println("***********************************************************************")
}
func ProcessSinglejob(index int, element *goquery.Selection) {
    href, exists := element.Attr("title")
    if exists {
        fmt.Println(href)
        return
    }
}
func main() {
    GetBrowseJobs("https://www.indeed.co.in/")
}

Upvotes: 0

Views: 296

Answers (1)

Sabyasachi Patra
Sabyasachi Patra

Reputation: 680

add www to this line

  response, err := client.Get("https://indeed.co.in" + Urls)

i.e.

  response, err := client.Get("https://www.indeed.co.in" + Urls)

Upvotes: 2

Related Questions