Files
wallabag-tools/wallabag_mass_import.py
2026-03-02 14:09:26 +01:00

127 lines
3.8 KiB
Python

import asyncio
import argparse
import configparser
import logging
import json
import os
from typing import Any, Dict, List
from helpers import load_configuration, connect_to_wallabag
import aiohttp
from wallabag_api.wallabag import Wallabag
logging.basicConfig(level=logging.DEBUG)
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
DEFAULT_CONFIG_PATH = os.path.join(ROOT_DIR, "config.ini")
DEFAULT_ARTICLES_PATH = os.path.join(ROOT_DIR, "articles.json")
async def post_entry(
w_api: Wallabag, item: Dict[str, str], semaphore: asyncio.Semaphore
):
async with semaphore:
try:
entry = await w_api.post_entries(
url=item["url"],
archive=item["is_archived"],
starred=item["is_starred"],
tags=",".join(item["tags"]),
)
logging.info(
f"Entry url {item['url']} posted to wallabag with id {entry['id']}"
)
return entry["id"]
except Exception as e:
logging.error(f"Creating {item['url']} generated an exception : {e}")
async def delete_entry(w_api: Wallabag, item_id: int, semaphore: asyncio.Semaphore):
async with semaphore:
try:
entry = await w_api.delete_entries(item_id)
logging.info(f"Deleted wallabag entry with id {entry['id']}")
except Exception as e:
logging.error(f"Deleting id {item_id} generated an exception : {e}")
async def import_articles(w_api: Wallabag, path: str, max_entries=0, max_requests=20):
sem = asyncio.Semaphore(max_requests)
entries_id = set()
tasks = set()
with open(path) as f:
for index, item in enumerate(json.load(f)):
if max_entries > 0 and index == max_entries:
break
task = asyncio.ensure_future(post_entry(w_api, item, sem))
tasks.add(task)
entries_id = await asyncio.gather(*tasks)
return entries_id
async def delete_all_entries(ids: List[int], w_api: Wallabag, max_requests=20):
sem = asyncio.Semaphore(max_requests)
tasks = (
asyncio.ensure_future(delete_entry(w_api, id, sem))
for id in ids
if id is not None
)
await asyncio.gather(*tasks)
async def async_main(
config_path: str = DEFAULT_CONFIG_PATH,
articles_path: str = DEFAULT_ARTICLES_PATH,
max_entries: int = 0,
max_requests: int = 20,
delete_after_import: bool = False,
):
async with aiohttp.ClientSession() as session:
configuration = dict(load_configuration(config_path))
w_api = await connect_to_wallabag(configuration, session)
entries_id = await import_articles(
w_api, articles_path, max_entries, max_requests
)
if delete_after_import:
await delete_all_entries(entries_id, w_api, max_requests)
def main():
parser = argparse.ArgumentParser(description="Mass import articles to Wallabag")
parser.add_argument(
"--config", default=DEFAULT_CONFIG_PATH, help="Path to config file"
)
parser.add_argument(
"--articles", default=DEFAULT_ARTICLES_PATH, help="Path to articles JSON file"
)
parser.add_argument(
"--max-entries",
type=int,
default=0,
help="Maximum number of entries to import (0 = all)",
)
parser.add_argument(
"--max-requests", type=int, default=20, help="Maximum concurrent requests"
)
parser.add_argument(
"--delete-after-import", action="store_true", help="Delete entries after import"
)
args = parser.parse_args()
asyncio.run(
async_main(
args.config,
args.articles,
args.max_entries,
args.max_requests,
args.delete_after_import,
)
)
if __name__ == "__main__":
main()