import asyncio import argparse import configparser import logging import json import os from typing import Any, Dict, List from helpers import load_configuration, connect_to_wallabag import aiohttp from wallabag_api.wallabag import Wallabag logging.basicConfig(level=logging.DEBUG) ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_CONFIG_PATH = os.path.join(ROOT_DIR, "config.ini") DEFAULT_ARTICLES_PATH = os.path.join(ROOT_DIR, "articles.json") async def post_entry( w_api: Wallabag, item: Dict[str, str], semaphore: asyncio.Semaphore ): async with semaphore: try: entry = await w_api.post_entries( url=item["url"], archive=item["is_archived"], starred=item["is_starred"], tags=",".join(item["tags"]), ) logging.info( f"Entry url {item['url']} posted to wallabag with id {entry['id']}" ) return entry["id"] except Exception as e: logging.error(f"Creating {item['url']} generated an exception : {e}") async def delete_entry(w_api: Wallabag, item_id: int, semaphore: asyncio.Semaphore): async with semaphore: try: entry = await w_api.delete_entries(item_id) logging.info(f"Deleted wallabag entry with id {entry['id']}") except Exception as e: logging.error(f"Deleting id {item_id} generated an exception : {e}") async def import_articles(w_api: Wallabag, path: str, max_entries=0, max_requests=20): sem = asyncio.Semaphore(max_requests) entries_id = set() tasks = set() with open(path) as f: for index, item in enumerate(json.load(f)): if max_entries > 0 and index == max_entries: break task = asyncio.ensure_future(post_entry(w_api, item, sem)) tasks.add(task) entries_id = await asyncio.gather(*tasks) return entries_id async def delete_all_entries(ids: List[int], w_api: Wallabag, max_requests=20): sem = asyncio.Semaphore(max_requests) tasks = ( asyncio.ensure_future(delete_entry(w_api, id, sem)) for id in ids if id is not None ) await asyncio.gather(*tasks) async def async_main( config_path: str = DEFAULT_CONFIG_PATH, articles_path: str = DEFAULT_ARTICLES_PATH, max_entries: int = 0, max_requests: int = 20, delete_after_import: bool = False, ): async with aiohttp.ClientSession() as session: configuration = dict(load_configuration(config_path)) w_api = await connect_to_wallabag(configuration, session) entries_id = await import_articles( w_api, articles_path, max_entries, max_requests ) if delete_after_import: await delete_all_entries(entries_id, w_api, max_requests) def main(): parser = argparse.ArgumentParser(description="Mass import articles to Wallabag") parser.add_argument( "--config", default=DEFAULT_CONFIG_PATH, help="Path to config file" ) parser.add_argument( "--articles", default=DEFAULT_ARTICLES_PATH, help="Path to articles JSON file" ) parser.add_argument( "--max-entries", type=int, default=0, help="Maximum number of entries to import (0 = all)", ) parser.add_argument( "--max-requests", type=int, default=20, help="Maximum concurrent requests" ) parser.add_argument( "--delete-after-import", action="store_true", help="Delete entries after import" ) args = parser.parse_args() asyncio.run( async_main( args.config, args.articles, args.max_entries, args.max_requests, args.delete_after_import, ) ) if __name__ == "__main__": main()