Reputation: 763
I'm working with Github API right now, and here's a function that fetches all pull requests for each repo in list:
async def get_all_pulls(repos, api):
pulls = []
for repo in repos:
try:
async for pull in api.getiter(f'/repos/{org}/{repo}/pulls?state=all'):
pull['repo'] = repo
if pull not in pulls:
pulls.append(pull)
except Exception:
print(f"Bad repo/no access=> [{repo}]")
continue
return pulls
Everything works fine but one little problem, it takes a lot of time because of that iteration over repos(let's say there is 30 of them).
I was trying to make it async like this(sure thing I am getting rid of for loop in declaration when using this):
# gather all prs for all repos
tasks = [asyncio.ensure_future(get_all_pulls_for_repo(api, repo)) for repo in repos]
results = await asyncio.gather(*tasks)
# unwrap list of lists
for res in results:
all_pull_requests += res
But I get crashes and saying repos are bad etc. I think I'm missing something important here but can't get what.
Why does it crash with async for loop? And can I make it work?
UPDATE1: Traceback at get_all_reviews:
Traceback (most recent call last):
File "/home/metal/Documents/projects/-git/async_git_tool.py", line 193, in <module>
loop.run_until_complete(main())
File "/home/metal/.pyenv/versions/3.6.0/lib/python3.6/asyncio/base_events.py", line 466, in run_until_complete
return future.result()
File "/home/metal/Documents/projects/-git/async_git_tool.py", line 113, in main
reviewed = await get_all_reviews(created, api, ss_programmers)
File "/home/metal/Documents/projects/-git/async_git_tool.py", line 181, in get_all_reviews
async for review in api.getiter(f'/repos/{org}/{pr_repo}/pulls/{pr_number}/reviews'):
File "/home/metal/Documents/projects/-git/venv/lib/python3.6/site-packages/gidgethub/abc.py", line 85, in getiter
data, more = await self._make_request("GET", url, url_vars, b"", accept)
File "/home/metal/Documents/projects/-git/venv/lib/python3.6/site-packages/gidgethub/abc.py", line 66, in _make_request
data, self.rate_limit, more = sansio.decipher_response(*response)
File "/home/metal/Documents/projects/-git/venv/lib/python3.6/site-packages/gidgethub/sansio.py", line 284, in decipher_response
rate_limit = RateLimit.from_http(headers)
File "/home/metal/Documents/projects/-git/venv/lib/python3.6/site-packages/gidgethub/sansio.py", line 226, in from_http
limit = int(headers["x-ratelimit-limit"])
File "multidict/_multidict.pyx", line 140, in multidict._multidict._Base.__getitem__
File "multidict/_multidict.pyx", line 135, in multidict._multidict._Base._getone
KeyError: "Key not found: 'x-ratelimit-limit'"
Here's the funciton itself:
async def get_all_reviews(pulls, api, programmers):
reviewed_pulls = []
for pull in pulls:
pr_repo = pull['repo']
pr_number = str(pull['number'])
async for review in api.getiter(f'/repos/{org}/{pr_repo}/pulls/{pr_number}/reviews'):
if review['user']['login'] not in programmers \
and pull not in reviewed_pulls:
reviewed_pulls.append(pull)
return reviewed_pulls
and I'm calling it like that:
reviewed = await get_all_reviews(softserve_created, api, ss_programmers)
Upvotes: 2
Views: 472
Reputation: 39626
Idea you described worked fine for me:
import asyncio
import aiohttp
import gidgethub
from gidgethub.aiohttp import GitHubAPI
# TODO
# paste your token to have rate limits
# https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/
TOKEN = '...'
async def get_all_pulls_for_repo(gh, org, repo):
pulls = []
async for pull in gh.getiter(f'/repos/{org}/{repo}/pulls?state=all'):
pulls.append(pull)
await gh.sleep(0.1) # avoid RateLimitExceeded, you should count it somehow
return pulls
async def main():
org = 'brettcannon'
repos = ['gidgethub', 'caniusepython3', 'importlib_resources']
async with aiohttp.ClientSession() as session:
gh = GitHubAPI(session, 'requester', oauth_token=TOKEN)
tasks = [
asyncio.ensure_future(get_all_pulls_for_repo(gh, org, repo))
for repo
in repos
]
results = await asyncio.gather(*tasks)
for res in results:
for pull in res:
print(pull['url'])
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
loop.close()
Create token for requests, paste it and you'll see list of PR urls.
Upvotes: 1