sarac
sarac

Reputation: 67

Beautiful soup dictionary html parsing problem

I am trying to configure an existing add-on available on Anki programme using AnkiWeb desktop tool.

The problem is when I trying to use a single-word query, it is working properly. But if it is more than one word, it doesn't work because I have a split words and should add + sign between them like this:

https://www.oxfordlearnersdictionaries.com/definition/english/come-along?q=come+along

I am very beginner to Python I have no idea how can I get over this problem.

#-- coding:utf-8 --
import random
from ..base import *

import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint

@register(u'Oxford_Article')
class Oxford_Article(WebService):

    def init(self):
        super(Oxford_Article, self).init()

    def _get_from_api(self):
        sleep(randint(1,3))
        
        data = self.get_response("https://www.oxfordlearnersdictionaries.com/definition/english/{}".format(self.quote_word))
        soup = parse_html(data)
        result = {
            'Article': u'',
        }

        # Article
        element = soup.find('div', id='entryContent')
        for s in element.select('script'):
            s.extract()
        if element:
            result['Article'] = u''.join(str(e) for e in element.contents)

        return self.cache_this(result)

    @export([u'entryContent', u'Article definition'])
    def fld_definate(self):
        return self._get_field('Article')

Upvotes: 0

Views: 148

Answers (1)

Abhilash
Abhilash

Reputation: 2256

  • The if element: check needs to be before this block of code.

  • self.quote_word from AnkiWeb is already encoded, adding + is unnecessary.

  • The URL you used is not a generic one, find an appropriate API that supports querying.

    if element:
        for s in element.select('script'):
            s.extract()
        result['Article'] = u''.join(str(e) for e in element.contents)

Working version of your add-on:

from ..base import *

@register(u'Oxford_Article')
class Oxford_Article(WebService):
    def _get_from_api(self, lang_id="en"):
        data = self.get_response("https://www.oxfordlearnersdictionaries.com/search/english/?q={}".format(self.quote_word))
        soup = parse_html(data)
        result = {'Article': u''}
        # Article
        element = soup.find('div', id='entryContent')

        if element:
            _unused = [s.extract() for s in element.select('script')]
            result['Article'] = u''.join(str(e) for e in element.contents)
        return self.cache_this(result)

    @export('Article definition')
    def fld_definate(self):
        return self._get_field('Article')

Output:

multiword queries

Upvotes: 1

Related Questions