feat: uses a semaphore to limit concurrent requests

This commit is contained in:
Thibaud Gasser 2020-03-13 16:37:31 +01:00
parent 4f06791cad
commit 3f1ed1403e

View File

@ -35,41 +35,58 @@ async def connect_to_wallabag(
) )
async def post_entry(w_api: Wallabag, item: Dict[str, str]): async def post_entry(
entry = await w_api.post_entries( w_api: Wallabag, item: Dict[str, str], semaphore: asyncio.Semaphore
url=item["url"], ):
archive=item["is_archived"], async with semaphore:
starred=item["is_starred"], try:
original_url=item["origin_url"], entry = await w_api.post_entries(
tags=",".join(item["tags"]), url=item["url"],
) archive=item["is_archived"],
logging.info(f"Entry url {item['url']} posted to wallabag with id {entry['id']}") starred=item["is_starred"],
return entry["id"] original_url=item["origin_url"],
tags=",".join(item["tags"]),
)
logging.info(
f"Entry url {item['url']} posted to wallabag with id {entry['id']}"
)
return entry["id"]
except Exception as e:
logging.error(f"Creating {item['url']} generated an exception : {e}")
async def delete_entry(w_api: Wallabag, item_id: int): async def delete_entry(w_api: Wallabag, item_id: int, semaphore: asyncio.Semaphore):
entry = await w_api.delete_entries(item_id) async with semaphore:
logging.info(f"Deleted wallabag entry with id {entry['id']}") try:
entry = await w_api.delete_entries(item_id)
logging.info(f"Deleted wallabag entry with id {entry['id']}")
except Exception as e:
logging.error(f"Deleting id {item_id} generated an exception : {e}")
async def import_articles(w_api: Wallabag, path: str, limit=0): async def import_articles(w_api: Wallabag, path: str, max_entries=0, max_requests=20):
entries_id = [] sem = asyncio.Semaphore(max_requests)
tasks = []
entries_id = set()
tasks = set()
with open(path) as f: with open(path) as f:
for index, item in enumerate(json.load(f)): for index, item in enumerate(json.load(f)):
if limit > 0 and index == limit: if max_entries > 0 and index == max_entries:
break break
task = asyncio.ensure_future(post_entry(w_api, item)) task = asyncio.ensure_future(post_entry(w_api, item, sem))
tasks.append(task) tasks.add(task)
entries_id = await asyncio.gather(*tasks) entries_id = await asyncio.gather(*tasks)
return entries_id return entries_id
async def delete_all_entries(ids: List[int], w_api: Wallabag): async def delete_all_entries(ids: List[int], w_api: Wallabag, max_requests=20):
tasks = [] sem = asyncio.Semaphore(max_requests)
for id in ids: tasks = (
tasks.append(delete_entry(w_api, id)) asyncio.ensure_future(delete_entry(w_api, id, sem))
for id in ids
if id is not None
)
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
@ -78,9 +95,9 @@ async def main():
configuration = dict(load_configuration(CONFIG_PATH)) configuration = dict(load_configuration(CONFIG_PATH))
w_api = await connect_to_wallabag(configuration, session) w_api = await connect_to_wallabag(configuration, session)
entries_id = await import_articles(w_api, ARTICLES_PATH, limit=4) entries_id = await import_articles(w_api, ARTICLES_PATH, max_entries=20, max_requests=20)
await delete_all_entries(entries_id, w_api) await delete_all_entries(entries_id, w_api)
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main(), debug=True)