Pyzotero: a Python client for the Zotero API pyzotero.readthedocs.io
zotero

Refactor monolithic zotero.py into modular package structure

Split the 2050-line zotero.py into logical modules while maintaining
backwards compatibility:

- _utils.py: Utility functions (build_url, merge_params, token, chunks)
- _decorators.py: API decorators (tcache, backoff_check, retrieve, ss_wrap)
- _search.py: SavedSearch class for saved search functionality
- _upload.py: Zupload class for file attachment handling
- _client.py: Main Zotero class with API methods
- errors.py: Exception classes and error_handler function

Backwards compatibility maintained via:
- zotero.py: Re-exports all symbols from new modules
- zotero_errors.py: Re-exports from errors.py
- __init__.py: Public API exports for direct imports

All 75 tests pass.


Signed-off-by: Stephan Hügel <shugel@tcd.ie>

+2557 -2221
+2 -2
README.md
··· 15 15 Then: 16 16 17 17 ``` python 18 - from pyzotero import zotero 19 - zot = zotero.Zotero(library_id, library_type, api_key) # local=True for read access to local Zotero 18 + from pyzotero import Zotero 19 + zot = Zotero(library_id, library_type, api_key) # local=True for read access to local Zotero 20 20 items = zot.top(limit=5) 21 21 # we've retrieved the latest five top-level items in our library 22 22 # we can print each item's item type and ID
+1 -2
doc/conf.py
··· 16 16 17 17 sys.path.insert(1, "..") 18 18 import pyzotero 19 - from pyzotero import zotero as zot 20 19 21 20 # Tell Jinja2 templates the build is running on Read the Docs 22 21 if os.environ.get("READTHEDOCS", "") == "True": ··· 24 23 html_context = {} 25 24 html_context["READTHEDOCS"] = True 26 25 27 - author = zot.__author__ 26 + author = "Stephan Hügel" 28 27 current_year = datetime.datetime.now(tz=datetime.timezone.utc).date().year 29 28 30 29 html_context = {
+8 -8
doc/index.rst
··· 30 30 .. code-block:: python 31 31 :emphasize-lines: 1,2,3 32 32 33 - from pyzotero import zotero 34 - zot = zotero.Zotero(library_id, library_type, api_key) 33 + from pyzotero import Zotero 34 + zot = Zotero(library_id, library_type, api_key) 35 35 items = zot.top(limit=5) 36 36 # we've retrieved the latest five top-level items in our library 37 37 # we can print each item's item type and ID ··· 205 205 .. code-block:: python 206 206 :emphasize-lines: 4 207 207 208 - from pyzotero import zotero 209 - zot = zotero.Zotero('123', 'user', 'ABC1234XYZ') 208 + from pyzotero import Zotero 209 + zot = Zotero('123', 'user', 'ABC1234XYZ') 210 210 # we now have a Zotero object, zot, and access to all its methods 211 211 first_ten = zot.items(limit=10) 212 212 # a list containing dicts of the ten most recently modified library items ··· 731 731 732 732 .. code-block:: python 733 733 734 - from pyzotero import zotero 735 - zot = zotero.Zotero(library_id, library_type, api_key) 734 + from pyzotero import Zotero 735 + zot = Zotero(library_id, library_type, api_key) 736 736 # only retrieve a single item 737 737 # this will retrieve the most recently added/modified top-level item 738 738 first_item = zot.top(limit=1) ··· 747 747 748 748 .. code-block:: python 749 749 750 - from pyzotero import zotero 751 - zot = zotero.Zotero(library_id, library_type, api_key) 750 + from pyzotero import Zotero 751 + zot = Zotero(library_id, library_type, api_key) 752 752 # retrieve all top-level items 753 753 toplevel = zot.everything(zot.top()) 754 754
+5 -2
example/local_base_use.py
··· 1 - from pyzotero import zotero 2 - zot = zotero.Zotero(library_id='000000', library_type = 'user', local=True) # local=True for read access to local Zotero 1 + from pyzotero import Zotero 2 + 3 + zot = Zotero( 4 + library_id="000000", library_type="user", local=True 5 + ) # local=True for read access to local Zotero 3 6 items = zot.top(limit=5) 4 7 # we've retrieved the latest five top-level items in our library 5 8 # we can print each item's item type and ID
+10 -11
example/local_copy_pdf.py
··· 1 - from pyzotero import zotero 2 1 from pathlib import Path 2 + 3 + from pyzotero import Zotero 4 + 3 5 4 6 def copy_specific_pdf(item_id, output_dir, new_name=None): 5 7 """ ··· 12 14 13 15 """ 14 16 # Initialize Zotero client with local=True 15 - zot = zotero.Zotero(library_id='000000', library_type='user', local=True) 17 + zot = Zotero(library_id="000000", library_type="user", local=True) 16 18 17 19 # Create output directory if it doesn't exist 18 20 output_path = Path(output_dir) ··· 25 27 try: 26 28 # Get original filename or use default 27 29 default_filename = f"{item_id}.pdf" 28 - original_filename = item['data'].get('filename', default_filename) 30 + original_filename = item["data"].get("filename", default_filename) 29 31 30 32 # Use new_name if provided, otherwise use original filename 31 33 filename = new_name if new_name else original_filename 32 34 33 35 # Add .pdf extension if not present 34 - if not filename.lower().endswith('.pdf'): 35 - filename += '.pdf' 36 + if not filename.lower().endswith(".pdf"): 37 + filename += ".pdf" 36 38 37 39 # Use dump() with explicit filename 38 - full_path = zot.dump( 39 - item_id, 40 - filename=filename, 41 - path=str(output_path) 42 - ) 40 + full_path = zot.dump(item_id, filename=filename, path=str(output_path)) 43 41 44 42 print(f"\nSuccessfully copied file to: {full_path}") 45 43 print(f"Title: {item['data'].get('title', 'No title')}") ··· 51 49 except Exception as e: 52 50 print(f"Error accessing Zotero item: {e!s}") 53 51 52 + 54 53 if __name__ == "__main__": 55 54 # Example usage with specific item ID 56 - item_id = '8M9FYC2W' 55 + item_id = "8M9FYC2W" 57 56 data_dir = "./example/data/pdfs" 58 57 59 58 # Example 1: Copy with new name
+15 -10
example/local_get_item_detail.py
··· 1 - from pyzotero import zotero 2 1 from pprint import pprint 2 + 3 + from pyzotero import Zotero 4 + 3 5 4 6 def get_item_detail(item_id): 5 7 """ ··· 8 10 item_id (str): Zotero item ID 9 11 """ 10 12 # Initialize Zotero client with local=True 11 - zot = zotero.Zotero(library_id='000000', library_type='user', local=True) 13 + zot = Zotero(library_id="000000", library_type="user", local=True) 12 14 13 15 try: 14 16 # Get the item ··· 22 24 print(f"Title: {item['data'].get('title', 'No title')}") 23 25 24 26 # If it's an attachment, show parent item 25 - if item['data'].get('parentItem'): 27 + if item["data"].get("parentItem"): 26 28 try: 27 - parent = zot.item(item['data']['parentItem']) 29 + parent = zot.item(item["data"]["parentItem"]) 28 30 print("\nParent Item:") 29 31 print(f"Parent ID: {parent['key']}") 30 32 print(f"Parent Title: {parent['data'].get('title', 'No title')}") ··· 36 38 if children: 37 39 print("\nChild Items:") 38 40 for child in children: 39 - print(f"- {child['data'].get('title', 'No title')} " 40 - f"(ID: {child['key']}, " 41 - f"Type: {child['data'].get('itemType', 'Unknown')})") 41 + print( 42 + f"- {child['data'].get('title', 'No title')} " 43 + f"(ID: {child['key']}, " 44 + f"Type: {child['data'].get('itemType', 'Unknown')})" 45 + ) 42 46 43 47 # Show collections this item belongs to 44 - collections = item['data'].get('collections', []) 48 + collections = item["data"].get("collections", []) 45 49 if collections: 46 50 print("\nCollections:") 47 51 try: ··· 54 58 # Show all metadata 55 59 print("\nFull Metadata:") 56 60 print("-" * 50) 57 - pprint(item['data']) 61 + pprint(item["data"]) 58 62 59 63 except Exception as e: 60 64 print(f"Error getting item details: {e!s}") ··· 62 66 else: 63 67 return item 64 68 69 + 65 70 if __name__ == "__main__": 66 71 # Example usage with a specific item ID 67 - item_id = 'K9V7JFXY' # Replace with your item ID 72 + item_id = "K9V7JFXY" # Replace with your item ID 68 73 item_detail = get_item_detail(item_id)
+8 -6
example/local_search_title.py
··· 1 - from pyzotero import zotero 1 + from pyzotero import Zotero 2 + 2 3 3 4 def search_by_title(title_query): 4 5 """ ··· 9 10 list: List of matching items 10 11 """ 11 12 # Initialize Zotero client with local=True 12 - zot = zotero.Zotero(library_id='000000', library_type='user', local=True) 13 + zot = Zotero(library_id="000000", library_type="user", local=True) 13 14 14 15 try: 15 16 # Search for items where title contains the query string ··· 19 20 print(f"\nFound {len(results)} items matching '{title_query}':") 20 21 for item in results: 21 22 # Get the item data 22 - title = item['data'].get('title', 'No title') 23 - item_type = item['data'].get('itemType', 'Unknown type') 24 - date = item['data'].get('date', 'No date') 25 - item_id = item['data'].get('key', 'No ID') 23 + title = item["data"].get("title", "No title") 24 + item_type = item["data"].get("itemType", "Unknown type") 25 + date = item["data"].get("date", "No date") 26 + item_id = item["data"].get("key", "No ID") 26 27 27 28 print("\n##########################") 28 29 print(f"ID: {item_id}") ··· 36 37 return [] 37 38 else: 38 39 return results 40 + 39 41 40 42 if __name__ == "__main__": 41 43 # Example usage
+60
src/pyzotero/__init__.py
··· 1 + """Pyzotero - Python wrapper for the Zotero API.""" 2 + 1 3 import importlib.metadata 2 4 3 5 try: ··· 5 7 __version__ = importlib.metadata.version(__package__ or __name__) 6 8 except importlib.metadata.PackageNotFoundError: 7 9 __version__ = "0.0.0" 10 + 11 + # Public API exports 12 + from pyzotero._client import Zotero 13 + from pyzotero._search import SavedSearch 14 + from pyzotero._upload import Zupload 15 + from pyzotero._utils import chunks 16 + from pyzotero.errors import ( 17 + CallDoesNotExistError, 18 + ConflictError, 19 + CouldNotReachURLError, 20 + FileDoesNotExistError, 21 + HTTPError, 22 + InvalidItemFieldsError, 23 + MissingCredentialsError, 24 + ParamNotPassedError, 25 + PreConditionFailedError, 26 + PreConditionRequiredError, 27 + PyZoteroError, 28 + RequestEntityTooLargeError, 29 + ResourceNotFoundError, 30 + TooManyItemsError, 31 + TooManyRequestsError, 32 + TooManyRetriesError, 33 + UnsupportedParamsError, 34 + UploadError, 35 + UserNotAuthorisedError, 36 + ) 37 + 38 + __all__ = [ 39 + # Exceptions 40 + "CallDoesNotExistError", 41 + "ConflictError", 42 + "CouldNotReachURLError", 43 + "FileDoesNotExistError", 44 + "HTTPError", 45 + "InvalidItemFieldsError", 46 + "MissingCredentialsError", 47 + "ParamNotPassedError", 48 + "PreConditionFailedError", 49 + "PreConditionRequiredError", 50 + "PyZoteroError", 51 + "RequestEntityTooLargeError", 52 + "ResourceNotFoundError", 53 + "SavedSearch", 54 + "TooManyItemsError", 55 + "TooManyRequestsError", 56 + "TooManyRetriesError", 57 + "UnsupportedParamsError", 58 + "UploadError", 59 + "UserNotAuthorisedError", 60 + # Main classes 61 + "Zotero", 62 + "Zupload", 63 + # Version 64 + "__version__", 65 + # Utilities 66 + "chunks", 67 + ]
+1440
src/pyzotero/_client.py
··· 1 + """Zotero API client for Pyzotero. 2 + 3 + This module contains the main Zotero class which provides methods for 4 + interacting with the Zotero API. 5 + """ 6 + 7 + from __future__ import annotations 8 + 9 + import copy 10 + import json 11 + import re 12 + import threading 13 + import time 14 + from pathlib import Path, PurePosixPath 15 + from urllib.parse import ( 16 + parse_qs, 17 + parse_qsl, 18 + quote, 19 + unquote, 20 + urlencode, 21 + urlparse, 22 + urlunparse, 23 + ) 24 + 25 + import httpx 26 + import whenever 27 + from httpx import Request 28 + 29 + import pyzotero as pz 30 + 31 + from . import errors as ze 32 + from ._decorators import backoff_check, cleanwrap, retrieve, ss_wrap, tcache 33 + from ._upload import Zupload 34 + from ._utils import ( 35 + DEFAULT_ITEM_LIMIT, 36 + DEFAULT_NUM_ITEMS, 37 + DEFAULT_TIMEOUT, 38 + ONE_HOUR, 39 + build_url, 40 + chunks, 41 + merge_params, 42 + token, 43 + ) 44 + from .errors import error_handler 45 + from .filetransport import Client as File_Client 46 + 47 + __author__ = "Stephan Hügel" 48 + __api_version__ = "3" 49 + 50 + 51 + class Zotero: 52 + """Zotero API methods. 53 + 54 + A full list of methods can be found here: 55 + http://www.zotero.org/support/dev/server_api 56 + """ 57 + 58 + def __init__( 59 + self, 60 + library_id=None, 61 + library_type=None, 62 + api_key=None, 63 + preserve_json_order=False, 64 + locale="en-US", 65 + local=False, 66 + ): 67 + self.client = None 68 + """Store Zotero credentials""" 69 + if not local: 70 + self.endpoint = "https://api.zotero.org" 71 + self.local = False 72 + else: 73 + self.endpoint = "http://localhost:23119/api" 74 + self.local = True 75 + if library_id is not None and library_type: 76 + self.library_id = library_id 77 + # library_type determines whether query begins w. /users or /groups 78 + self.library_type = library_type + "s" 79 + else: 80 + err = "Please provide both the library ID and the library type" 81 + raise ze.MissingCredentialsError(err) 82 + # api_key is not required for public individual or group libraries 83 + self.api_key = api_key 84 + if preserve_json_order: 85 + import warnings # noqa: PLC0415 86 + 87 + warnings.warn( 88 + "preserve_json_order is deprecated and will be removed in a future version. " 89 + "Python 3.7+ dicts preserve insertion order automatically.", 90 + DeprecationWarning, 91 + stacklevel=2, 92 + ) 93 + self.locale = locale 94 + self.url_params = None 95 + self.tag_data = False 96 + self.request = None 97 + self.snapshot = False 98 + self.client = httpx.Client( 99 + headers=self.default_headers(), 100 + follow_redirects=True, 101 + ) 102 + # these aren't valid item fields, so never send them to the server 103 + self.temp_keys = {"key", "etag", "group_id", "updated"} 104 + # determine which processor to use for the parsed content 105 + self.fmt = re.compile(r"(?<=format=)\w+") 106 + self.content = re.compile(r"(?<=content=)\w+") 107 + # JSON by default 108 + self.formats = { 109 + "application/atom+xml": "atom", 110 + "application/x-bibtex": "bibtex", 111 + "application/json": "json", 112 + "text/html": "snapshot", 113 + "text/plain": "plain", 114 + "text/markdown": "plain", 115 + "application/pdf; charset=utf-8": "pdf", 116 + "application/pdf": "pdf", 117 + "application/msword": "doc", 118 + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx", 119 + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx", 120 + "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx", 121 + "application/zip": "zip", 122 + "application/epub+zip": "zip", 123 + "audio/mpeg": "mp3", 124 + "video/mp4": "mp4", 125 + "audio/x-wav": "wav", 126 + "video/x-msvideo": "avi", 127 + "application/octet-stream": "octet", 128 + "application/x-tex": "tex", 129 + "application/x-texinfo": "texinfo", 130 + "image/jpeg": "jpeg", 131 + "image/png": "png", 132 + "image/gif": "gif", 133 + "image/tiff": "tiff", 134 + "application/postscript": "postscript", 135 + "application/rtf": "rtf", 136 + } 137 + self.processors = { 138 + "bib": self._bib_processor, 139 + "citation": self._citation_processor, 140 + "bibtex": self._bib_processor, 141 + "bookmarks": self._bib_processor, 142 + "coins": self._bib_processor, 143 + "csljson": self._csljson_processor, 144 + "mods": self._bib_processor, 145 + "refer": self._bib_processor, 146 + "rdf_bibliontology": self._bib_processor, 147 + "rdf_dc": self._bib_processor, 148 + "rdf_zotero": self._bib_processor, 149 + "ris": self._bib_processor, 150 + "tei": self._bib_processor, 151 + "wikipedia": self._bib_processor, 152 + "json": self._json_processor, 153 + "html": self._bib_processor, 154 + } 155 + self.links = None 156 + self.self_link = {} 157 + self.templates = {} 158 + self.savedsearch = None 159 + # these are required for backoff handling 160 + self.backoff = False 161 + self.backoff_duration = 0.0 162 + 163 + def __del__(self): 164 + """Remove client before cleanup.""" 165 + # this isn't guaranteed to run, but that's OK 166 + if c := self.client: 167 + c.close() 168 + 169 + @property 170 + def __version__(self): 171 + """Return the version of the pyzotero library.""" 172 + return pz.__version__ 173 + 174 + def _check_for_component(self, url, component): 175 + """Check a url path query fragment for a specific query parameter.""" 176 + return bool(parse_qs(url).get(component)) 177 + 178 + def _striplocal(self, url): 179 + """Remove the leading '/api' substring from urls if running in local mode.""" 180 + if self.local: 181 + parsed = urlparse(url) 182 + purepath = PurePosixPath(unquote(parsed.path)) 183 + newpath = "/".join(purepath.parts[2:]) 184 + replaced = parsed._replace(path="/" + newpath) 185 + return urlunparse(replaced) 186 + return url 187 + 188 + def _set_backoff(self, duration): 189 + """Set a backoff. 190 + 191 + Spins up a timer in a background thread which resets the backoff logic 192 + when it expires, then sets the time at which the backoff will expire. 193 + The latter step is required so that other calls can check whether there's 194 + an active backoff, because the threading.Timer method has no way 195 + of returning a duration. 196 + """ 197 + duration = float(duration) 198 + self.backoff = True 199 + threading.Timer(duration, self._reset_backoff).start() 200 + self.backoff_duration = time.time() + duration 201 + 202 + def _reset_backoff(self): 203 + self.backoff = False 204 + self.backoff_duration = 0.0 205 + 206 + def _check_backoff(self): 207 + """Before an API call is made, check whether there's an active backoff. 208 + 209 + If there is, check whether there's any time left on the backoff. 210 + If there is, sleep for the remainder before returning. 211 + """ 212 + if self.backoff: 213 + remainder = self.backoff_duration - time.time() 214 + if remainder > 0.0: 215 + time.sleep(remainder) 216 + 217 + def default_headers(self): 218 + """Return headers that are always OK to include.""" 219 + _headers = { 220 + "User-Agent": f"Pyzotero/{pz.__version__}", 221 + "Zotero-API-Version": f"{__api_version__}", 222 + } 223 + if self.api_key: 224 + _headers["Authorization"] = f"Bearer {self.api_key}" 225 + return _headers 226 + 227 + def _cache(self, response, key): 228 + """Add a retrieved template to the cache for 304 checking. 229 + 230 + Accepts a dict and key name, adds the retrieval time, and adds both 231 + to self.templates as a new dict using the specified key. 232 + """ 233 + # cache template and retrieval time for subsequent calls 234 + try: 235 + thetime = whenever.ZonedDateTime.now("Europe/London").py_datetime() 236 + except AttributeError: 237 + thetime = whenever.ZonedDateTime.now("Europe/London").py_datetime() 238 + self.templates[key] = {"tmplt": response.json(), "updated": thetime} 239 + return copy.deepcopy(response.json()) 240 + 241 + @cleanwrap 242 + def _cleanup(self, to_clean, allow=()): 243 + """Remove keys we added for internal use.""" 244 + # this item's been retrieved from the API, we only need the 'data' entry 245 + if to_clean.keys() == ["links", "library", "version", "meta", "key", "data"]: 246 + to_clean = to_clean["data"] 247 + return dict( 248 + [ 249 + [k, v] 250 + for k, v in list(to_clean.items()) 251 + if (k in allow or k not in self.temp_keys) 252 + ], 253 + ) 254 + 255 + def _retrieve_data(self, request: str | None = None, params=None): 256 + """Retrieve Zotero items via the API. 257 + 258 + Combine endpoint and request to access the specific resource. 259 + Returns a JSON document. 260 + """ 261 + if request is None: 262 + request = "" 263 + full_url = build_url(self.endpoint, request) 264 + # ensure that we wait if there's an active backoff 265 + self._check_backoff() 266 + # don't set locale if the url already contains it 267 + # we always add a locale if it's a "standalone" or first call 268 + needs_locale = not self.links or not self._check_for_component( 269 + self.links.get("next"), 270 + "locale", 271 + ) 272 + if needs_locale: 273 + if params: 274 + params["locale"] = self.locale 275 + else: 276 + params = {"locale": self.locale} 277 + # we now have to merge self.url_params (default params, and those supplied by the user) 278 + if not params: 279 + params = {} 280 + if not self.url_params: 281 + self.url_params = {} 282 + merged_params = {**self.url_params, **params} 283 + # our incoming url might be from the "links" dict, in which case it will contain url parameters. 284 + # Unfortunately, httpx doesn't like to merge query parameters in the url string and passed params 285 + # so we strip the url params, combining them with our existing url_params 286 + final_url, final_params = merge_params(full_url, merged_params) 287 + # file URI errors are raised immediately so we have to try here 288 + try: 289 + self.request = self.client.get( 290 + url=final_url, 291 + params=final_params, 292 + headers=self.default_headers(), 293 + timeout=DEFAULT_TIMEOUT, 294 + ) 295 + self.request.encoding = "utf-8" 296 + # The API doesn't return this any more, so we have to cheat 297 + self.self_link = self.request.url 298 + except httpx.UnsupportedProtocol: 299 + # File URI handler logic 300 + fc = File_Client() 301 + response = fc.get( 302 + url=final_url, 303 + params=final_params, 304 + headers=self.default_headers(), 305 + timeout=DEFAULT_TIMEOUT, 306 + follow_redirects=True, 307 + ) 308 + self.request = response 309 + # since we'll be writing bytes, we need to set this to a type that will trigger the bytes processor 310 + self.request.headers["Content-Type"] = "text/plain" 311 + try: 312 + self.request.raise_for_status() 313 + except httpx.HTTPError as exc: 314 + error_handler(self, self.request, exc) 315 + backoff = self.request.headers.get("backoff") or self.request.headers.get( 316 + "retry-after", 317 + ) 318 + if backoff: 319 + self._set_backoff(backoff) 320 + return self.request 321 + 322 + def _extract_links(self): 323 + """Extract self, first, next, last links from a request response.""" 324 + extracted = {} 325 + try: 326 + for key, value in self.request.links.items(): 327 + parsed = urlparse(value["url"]) 328 + fragment = urlunparse(("", "", parsed.path, "", parsed.query, "")) 329 + extracted[key] = fragment 330 + # add a 'self' link 331 + parsed = urlparse(str(self.self_link)) 332 + # strip 'format' query parameter and rebuild query string 333 + query_params = [(k, v) for k, v in parse_qsl(parsed.query) if k != "format"] 334 + # rebuild url fragment with just path and query (consistent with other links) 335 + extracted["self"] = urlunparse( 336 + ("", "", parsed.path, "", urlencode(query_params), "") 337 + ) 338 + except KeyError: 339 + # No links present, because it's a single item 340 + return None 341 + else: 342 + return extracted 343 + 344 + def _updated(self, url, payload, template=None): 345 + """Call to see if a template request returns 304. 346 + 347 + Accepts: 348 + - a string to combine with the API endpoint 349 + - a dict of format values, in case they're required by 'url' 350 + - a template name to check for 351 + 352 + As per the API docs, a template less than 1 hour old is 353 + assumed to be fresh, and will immediately return False if found. 354 + """ 355 + # If the template is more than an hour old, try a 304 356 + if ( 357 + abs( 358 + whenever.ZonedDateTime.now("Europe/London").py_datetime() 359 + - self.templates[template]["updated"], 360 + ).seconds 361 + > ONE_HOUR 362 + ): 363 + query = build_url( 364 + self.endpoint, 365 + url.format(u=self.library_id, t=self.library_type, **payload), 366 + ) 367 + headers = { 368 + "If-Modified-Since": payload["updated"].strftime( 369 + "%a, %d %b %Y %H:%M:%S %Z", 370 + ), 371 + } 372 + # perform the request, and check whether the response returns 304 373 + self._check_backoff() 374 + req = self.client.get(query, headers=headers) 375 + try: 376 + req.raise_for_status() 377 + except httpx.HTTPError as exc: 378 + error_handler(self, req, exc) 379 + backoff = self.request.headers.get("backoff") or self.request.headers.get( 380 + "retry-after", 381 + ) 382 + if backoff: 383 + self._set_backoff(backoff) 384 + return req.status_code == httpx.codes.NOT_MODIFIED 385 + # Still plenty of life left in't 386 + return False 387 + 388 + def add_parameters(self, **params): 389 + """Add URL parameters. 390 + 391 + Also ensure that only valid format/content combinations are requested. 392 + """ 393 + # Preserve constructor-level parameters (like locale) while allowing method-level overrides 394 + if self.url_params is None: 395 + self.url_params = {} 396 + 397 + # Store existing params to preserve things like locale 398 + preserved_params = self.url_params.copy() 399 + 400 + # we want JSON by default 401 + if not params.get("format"): 402 + params["format"] = "json" 403 + # non-standard content must be retrieved as Atom 404 + if params.get("content"): 405 + params["format"] = "atom" 406 + # TODO: rewrite format=atom, content=json request 407 + if "limit" not in params or params.get("limit") == 0: 408 + params["limit"] = DEFAULT_ITEM_LIMIT 409 + # Need ability to request arbitrary number of results for version 410 + # response 411 + # -1 value is hack that works with current version 412 + elif params["limit"] == -1 or params["limit"] is None: 413 + del params["limit"] 414 + # bib format can't have a limit 415 + if params.get("format") == "bib": 416 + params.pop("limit", None) 417 + 418 + # Merge preserved params with new params (new params override existing ones) 419 + self.url_params = {**preserved_params, **params} 420 + 421 + def _build_query(self, query_string, no_params=False): 422 + """Set request parameters. 423 + 424 + Will always add the user ID if it hasn't been specifically set by an API method. 425 + """ 426 + try: 427 + query = quote(query_string.format(u=self.library_id, t=self.library_type)) 428 + except KeyError as err: 429 + errmsg = f"There's a request parameter missing: {err}" 430 + raise ze.ParamNotPassedError(errmsg) from None 431 + # Add the URL parameters and the user key, if necessary 432 + if no_params is False and not self.url_params: 433 + self.add_parameters() 434 + return query 435 + 436 + @retrieve 437 + def publications(self): 438 + """Return the contents of My Publications.""" 439 + if self.library_type != "users": 440 + msg = "This API call does not exist for group libraries" 441 + raise ze.CallDoesNotExistError(msg) 442 + query_string = "/{t}/{u}/publications/items" 443 + return self._build_query(query_string) 444 + 445 + # The following methods are Zotero Read API calls 446 + def num_items(self): 447 + """Return the total number of top-level items in the library.""" 448 + query = "/{t}/{u}/items/top" 449 + return self._totals(query) 450 + 451 + def count_items(self): 452 + """Return the count of all items in a group / library.""" 453 + query = "/{t}/{u}/items" 454 + return self._totals(query) 455 + 456 + def num_collectionitems(self, collection): 457 + """Return the total number of items in the specified collection.""" 458 + query = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items" 459 + return self._totals(query) 460 + 461 + def _totals(self, query): 462 + """General method for returning total counts.""" 463 + self.add_parameters(limit=1) 464 + query = self._build_query(query) 465 + self._retrieve_data(query) 466 + self.url_params = None 467 + # extract the 'total items' figure 468 + return int(self.request.headers["Total-Results"]) 469 + 470 + @retrieve 471 + def key_info(self, **kwargs): 472 + """Retrieve info about the permissions associated with the key.""" 473 + query_string = f"/keys/{self.api_key}" 474 + return self._build_query(query_string) 475 + 476 + @retrieve 477 + def items(self, **kwargs): 478 + """Get user items.""" 479 + query_string = "/{t}/{u}/items" 480 + return self._build_query(query_string) 481 + 482 + @retrieve 483 + def settings(self, **kwargs): 484 + """Get synced user settings.""" 485 + query_string = "/{t}/{u}/settings" 486 + return self._build_query(query_string) 487 + 488 + @retrieve 489 + def fulltext_item(self, itemkey, **kwargs): 490 + """Get full-text content for an item.""" 491 + query_string = ( 492 + f"/{self.library_type}/{self.library_id}/items/{itemkey}/fulltext" 493 + ) 494 + return self._build_query(query_string) 495 + 496 + @backoff_check 497 + def set_fulltext(self, itemkey, payload): 498 + """Set full-text data for an item. 499 + 500 + <itemkey> should correspond to an existing attachment item. 501 + payload should be a dict containing three keys: 502 + 'content': the full-text content and either 503 + For text documents, 'indexedChars' and 'totalChars' OR 504 + For PDFs, 'indexedPages' and 'totalPages'. 505 + """ 506 + headers = {"Content-Type": "application/json"} 507 + return self.client.put( 508 + url=build_url( 509 + self.endpoint, 510 + f"/{self.library_type}/{self.library_id}/items/{itemkey}/fulltext", 511 + ), 512 + headers=headers, 513 + json=payload, 514 + ) 515 + 516 + def new_fulltext(self, since): 517 + """Retrieve list of full-text content items and versions newer than <since>.""" 518 + query_string = f"/{self.library_type}/{self.library_id}/fulltext" 519 + headers = {} 520 + params = {"since": since} 521 + self._check_backoff() 522 + resp = self.client.get( 523 + build_url(self.endpoint, query_string), 524 + params=params, 525 + headers=headers, 526 + ) 527 + try: 528 + resp.raise_for_status() 529 + except httpx.HTTPError as exc: 530 + error_handler(self, resp, exc) 531 + backoff = self.request.headers.get("backoff") or self.request.headers.get( 532 + "retry-after", 533 + ) 534 + if backoff: 535 + self._set_backoff(backoff) 536 + return resp.json() 537 + 538 + def item_versions(self, **kwargs): 539 + """Return dict associating item keys to versions. 540 + 541 + Accepts a since= parameter in kwargs to limit the data to those updated since. 542 + """ 543 + if "limit" not in kwargs: 544 + kwargs["limit"] = None 545 + kwargs["format"] = "versions" 546 + return self.items(**kwargs) 547 + 548 + def collection_versions(self, **kwargs): 549 + """Return dict associating collection keys to versions. 550 + 551 + Accepts a since= parameter in kwargs to limit the data to those updated since. 552 + """ 553 + if "limit" not in kwargs: 554 + kwargs["limit"] = None 555 + kwargs["format"] = "versions" 556 + return self.collections(**kwargs) 557 + 558 + def last_modified_version(self, **kwargs): 559 + """Get the last modified user or group library version.""" 560 + # This MUST be a multiple-object request, limit param notwithstanding 561 + self.items(limit=1) 562 + lmv = self.request.headers.get("last-modified-version", 0) 563 + return int(lmv) 564 + 565 + @retrieve 566 + def top(self, **kwargs): 567 + """Get user top-level items.""" 568 + query_string = "/{t}/{u}/items/top" 569 + return self._build_query(query_string) 570 + 571 + @retrieve 572 + def trash(self, **kwargs): 573 + """Get all items in the trash.""" 574 + query_string = "/{t}/{u}/items/trash" 575 + return self._build_query(query_string) 576 + 577 + @retrieve 578 + def searches(self, **kwargs): 579 + """Get saved searches.""" 580 + query_string = "/{t}/{u}/searches" 581 + return self._build_query(query_string) 582 + 583 + @retrieve 584 + def deleted(self, **kwargs): 585 + """Get all deleted items (requires since= parameter).""" 586 + if "limit" not in kwargs: 587 + # Currently deleted API doesn't respect limit leaving it out by 588 + # default preserves compat 589 + kwargs["limit"] = None 590 + query_string = "/{t}/{u}/deleted" 591 + return self._build_query(query_string) 592 + 593 + @retrieve 594 + def item(self, item, **kwargs): 595 + """Get a specific item.""" 596 + query_string = f"/{self.library_type}/{self.library_id}/items/{item.upper()}" 597 + return self._build_query(query_string) 598 + 599 + @retrieve 600 + def file(self, item, **kwargs): 601 + """Get the file from a specific item.""" 602 + query_string = ( 603 + f"/{self.library_type}/{self.library_id}/items/{item.upper()}/file" 604 + ) 605 + return self._build_query(query_string, no_params=True) 606 + 607 + def dump(self, itemkey, filename=None, path=None): 608 + """Dump a file attachment to disk, with optional filename and path.""" 609 + if not filename: 610 + filename = self.item(itemkey)["data"]["filename"] 611 + pth = Path(path) / filename if path else Path(filename) 612 + file = self.file(itemkey) 613 + if self.snapshot: 614 + self.snapshot = False 615 + pth = pth.parent / (pth.name + ".zip") 616 + with pth.open("wb") as f: 617 + f.write(file) 618 + 619 + @retrieve 620 + def children(self, item, **kwargs): 621 + """Get a specific item's child items.""" 622 + query_string = ( 623 + f"/{self.library_type}/{self.library_id}/items/{item.upper()}/children" 624 + ) 625 + return self._build_query(query_string) 626 + 627 + @retrieve 628 + def collection_items(self, collection, **kwargs): 629 + """Get a specific collection's items.""" 630 + query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items" 631 + return self._build_query(query_string) 632 + 633 + @retrieve 634 + def collection_items_top(self, collection, **kwargs): 635 + """Get a specific collection's top-level items.""" 636 + query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items/top" 637 + return self._build_query(query_string) 638 + 639 + @retrieve 640 + def collection_tags(self, collection, **kwargs): 641 + """Get a specific collection's tags.""" 642 + query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/tags" 643 + return self._build_query(query_string) 644 + 645 + @retrieve 646 + def collection(self, collection, **kwargs): 647 + """Get user collection.""" 648 + query_string = ( 649 + f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}" 650 + ) 651 + return self._build_query(query_string) 652 + 653 + @retrieve 654 + def collections(self, **kwargs): 655 + """Get user collections.""" 656 + query_string = "/{t}/{u}/collections" 657 + return self._build_query(query_string) 658 + 659 + def all_collections(self, collid=None): 660 + """Retrieve all collections and subcollections. 661 + 662 + Works for top-level collections or for a specific collection. 663 + Works at all collection depths. 664 + """ 665 + all_collections = [] 666 + 667 + def subcoll(clct): 668 + """Recursively add collections to a flat master list.""" 669 + all_collections.append(clct) 670 + if clct["meta"].get("numCollections", 0) > 0: 671 + # add collection to master list & recur with all child collections 672 + [ 673 + subcoll(c) 674 + for c in self.everything(self.collections_sub(clct["data"]["key"])) 675 + ] 676 + 677 + # select all top-level collections or a specific collection and children 678 + if collid: 679 + toplevel = [self.collection(collid)] 680 + else: 681 + toplevel = self.everything(self.collections_top()) 682 + [subcoll(collection) for collection in toplevel] 683 + return all_collections 684 + 685 + @retrieve 686 + def collections_top(self, **kwargs): 687 + """Get top-level user collections.""" 688 + query_string = "/{t}/{u}/collections/top" 689 + return self._build_query(query_string) 690 + 691 + @retrieve 692 + def collections_sub(self, collection, **kwargs): 693 + """Get subcollections for a specific collection.""" 694 + query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/collections" 695 + return self._build_query(query_string) 696 + 697 + @retrieve 698 + def groups(self, **kwargs): 699 + """Get user groups.""" 700 + query_string = "/users/{u}/groups" 701 + return self._build_query(query_string) 702 + 703 + @retrieve 704 + def tags(self, **kwargs): 705 + """Get tags.""" 706 + query_string = "/{t}/{u}/tags" 707 + self.tag_data = True 708 + return self._build_query(query_string) 709 + 710 + @retrieve 711 + def item_tags(self, item, **kwargs): 712 + """Get tags for a specific item.""" 713 + query_string = ( 714 + f"/{self.library_type}/{self.library_id}/items/{item.upper()}/tags" 715 + ) 716 + self.tag_data = True 717 + return self._build_query(query_string) 718 + 719 + def all_top(self, **kwargs): 720 + """Retrieve all top-level items.""" 721 + return self.everything(self.top(**kwargs)) 722 + 723 + @retrieve 724 + def follow(self): 725 + """Return the result of the call to the URL in the 'Next' link.""" 726 + if n := self.links.get("next"): 727 + return self._striplocal(n) 728 + return None 729 + 730 + def iterfollow(self): 731 + """Return generator for self.follow().""" 732 + # use same criterion as self.follow() 733 + while True: 734 + if self.links.get("next"): 735 + yield self.follow() 736 + else: 737 + return 738 + 739 + def makeiter(self, func): 740 + """Return a generator of func's results.""" 741 + if self.links is None or "self" not in self.links: 742 + msg = "makeiter() requires a previous API call with pagination links" 743 + raise RuntimeError(msg) 744 + # reset the link. This results in an extra API call, yes 745 + self.links["next"] = self.links["self"] 746 + return self.iterfollow() 747 + 748 + def everything(self, query): 749 + """Retrieve all items in the library for a particular query. 750 + 751 + This method will override the 'limit' parameter if it's been set. 752 + """ 753 + try: 754 + items = [] 755 + items.extend(query) 756 + while self.links.get("next"): 757 + items.extend(self.follow()) 758 + except TypeError: 759 + # we have a bibliography object ughh 760 + items = copy.deepcopy(query) 761 + while self.links.get("next"): 762 + items.entries.extend(self.follow().entries) 763 + return items 764 + 765 + def get_subset(self, subset): 766 + """Retrieve a subset of items. 767 + 768 + Accepts a single argument: a list of item IDs. 769 + """ 770 + if len(subset) > DEFAULT_NUM_ITEMS: 771 + err = f"You may only retrieve {DEFAULT_NUM_ITEMS} items per call" 772 + raise ze.TooManyItemsError(err) 773 + # remember any url parameters that have been set 774 + params = self.url_params 775 + retr = [] 776 + for itm in subset: 777 + retr.append(self.item(itm)) 778 + self.url_params = params 779 + # clean up URL params when we're finished 780 + self.url_params = None 781 + return retr 782 + 783 + # The following methods process data returned by Read API calls 784 + def _json_processor(self, retrieved): 785 + """Format and return data from API calls which return Items.""" 786 + # send entries to _tags_data if there's no JSON 787 + try: 788 + items = [json.loads(e["content"][0]["value"]) for e in retrieved.entries] 789 + except KeyError: 790 + return self._tags_data(retrieved) 791 + return items 792 + 793 + def _csljson_processor(self, retrieved): 794 + """Return a list of dicts which are dumped CSL JSON.""" 795 + items = [ 796 + json.loads(entry["content"][0]["value"]) for entry in retrieved.entries 797 + ] 798 + self.url_params = None 799 + return items 800 + 801 + def _bib_processor(self, retrieved): 802 + """Return a list of strings formatted as HTML bibliography entries.""" 803 + items = [bib["content"][0]["value"] for bib in retrieved.entries] 804 + self.url_params = None 805 + return items 806 + 807 + def _citation_processor(self, retrieved): 808 + """Return a list of strings formatted as HTML citation entries.""" 809 + items = [cit["content"][0]["value"] for cit in retrieved.entries] 810 + self.url_params = None 811 + return items 812 + 813 + def _tags_data(self, retrieved): 814 + """Format and return data from API calls which return Tags.""" 815 + self.url_params = None 816 + return [t["tag"] for t in retrieved] 817 + 818 + # The following methods are Write API calls 819 + def item_template(self, itemtype, linkmode=None): 820 + """Get a template for a new item.""" 821 + # if we have a template and it hasn't been updated since we stored it 822 + template_name = f"item_template_{itemtype}_{linkmode or ''}" 823 + params = {"itemType": itemtype} 824 + # Set linkMode parameter for API request if itemtype is attachment 825 + if itemtype == "attachment": 826 + params["linkMode"] = linkmode 827 + self.add_parameters(**params) 828 + query_string = "/items/new" 829 + if self.templates.get(template_name) and not self._updated( 830 + query_string, 831 + self.templates[template_name], 832 + template_name, 833 + ): 834 + return copy.deepcopy(self.templates[template_name]["tmplt"]) 835 + # otherwise perform a normal request and cache the response 836 + retrieved = self._retrieve_data(query_string) 837 + return self._cache(retrieved, template_name) 838 + 839 + def _attachment_template(self, attachment_type): 840 + """Return a new attachment template of the required type. 841 + 842 + Types: imported_file, imported_url, linked_file, linked_url 843 + """ 844 + return self.item_template("attachment", linkmode=attachment_type) 845 + 846 + def _attachment(self, payload, parentid=None): 847 + """Create attachments. 848 + 849 + Accepts a list of one or more attachment template dicts 850 + and an optional parent Item ID. If this is specified, 851 + attachments are created under this ID. 852 + """ 853 + attachment = Zupload(self, payload, parentid) 854 + return attachment.upload() 855 + 856 + @ss_wrap 857 + def show_operators(self): 858 + """Show available saved search operators.""" 859 + return self.savedsearch.operators 860 + 861 + @ss_wrap 862 + def show_conditions(self): 863 + """Show available saved search conditions.""" 864 + return self.savedsearch.conditions_operators.keys() 865 + 866 + @ss_wrap 867 + def show_condition_operators(self, condition): 868 + """Show available operators for a given saved search condition.""" 869 + # dict keys of allowed operators for the current condition 870 + permitted_operators = self.savedsearch.conditions_operators.get(condition) 871 + # transform these into values 872 + return {self.savedsearch.operators.get(op) for op in permitted_operators} 873 + 874 + @ss_wrap 875 + def saved_search(self, name, conditions): 876 + """Create a saved search. 877 + 878 + conditions is a list of dicts containing search conditions and must 879 + contain the following str keys: condition, operator, value 880 + """ 881 + self.savedsearch._validate(conditions) 882 + payload = [{"name": name, "conditions": conditions}] 883 + headers = {"Zotero-Write-Token": token()} 884 + self._check_backoff() 885 + req = self.client.post( 886 + url=build_url( 887 + self.endpoint, 888 + f"/{self.library_type}/{self.library_id}/searches", 889 + ), 890 + headers=headers, 891 + json=payload, 892 + ) 893 + self.request = req 894 + try: 895 + req.raise_for_status() 896 + except httpx.HTTPError as exc: 897 + error_handler(self, req, exc) 898 + backoff = self.request.headers.get("backoff") or self.request.headers.get( 899 + "retry-after", 900 + ) 901 + if backoff: 902 + self._set_backoff(backoff) 903 + return req.json() 904 + 905 + @ss_wrap 906 + def delete_saved_search(self, keys): 907 + """Delete one or more saved searches. 908 + 909 + Pass a list of one or more unique search keys. 910 + """ 911 + headers = {"Zotero-Write-Token": token()} 912 + self._check_backoff() 913 + req = self.client.delete( 914 + url=build_url( 915 + self.endpoint, 916 + f"/{self.library_type}/{self.library_id}/searches", 917 + ), 918 + headers=headers, 919 + params={"searchKey": ",".join(keys)}, 920 + ) 921 + self.request = req 922 + try: 923 + req.raise_for_status() 924 + except httpx.HTTPError as exc: 925 + error_handler(self, req, exc) 926 + backoff = self.request.headers.get("backoff") or self.request.headers.get( 927 + "retry-after", 928 + ) 929 + if backoff: 930 + self._set_backoff(backoff) 931 + return req.status_code 932 + 933 + def upload_attachments(self, attachments, parentid=None, basedir=None): 934 + """Upload files to the already created (but never uploaded) attachments.""" 935 + return Zupload(self, attachments, parentid, basedir=basedir).upload() 936 + 937 + def add_tags(self, item, *tags): 938 + """Add one or more tags to a retrieved item, then update it on the server. 939 + 940 + Accepts a dict, and one or more tags to add to it. 941 + Returns the updated item from the server. 942 + """ 943 + # Make sure there's a tags field, or add one 944 + if not item.get("data", {}).get("tags"): 945 + item["data"]["tags"] = [] 946 + for tag in tags: 947 + item["data"]["tags"].append({"tag": f"{tag}"}) 948 + # make sure everything's OK 949 + self.check_items([item]) 950 + return self.update_item(item) 951 + 952 + def check_items(self, items): 953 + """Check that items to be created contain no invalid dict keys. 954 + 955 + Accepts a single argument: a list of one or more dicts. 956 + The retrieved fields are cached and re-used until a 304 call fails. 957 + """ 958 + params = {"locale": self.locale, "timeout": DEFAULT_TIMEOUT} 959 + query_string = "/itemFields" 960 + r = Request( 961 + "GET", 962 + build_url(self.endpoint, query_string), 963 + params=params, 964 + ) 965 + with httpx.Client() as client: 966 + response = client.send(r) 967 + # now split up the URL 968 + result = urlparse(str(response.url)) 969 + # construct cache key 970 + cachekey = result.path + "_" + result.query 971 + if self.templates.get(cachekey) and not self._updated( 972 + query_string, 973 + self.templates[cachekey], 974 + cachekey, 975 + ): 976 + template = {t["field"] for t in self.templates[cachekey]["tmplt"]} 977 + else: 978 + template = {t["field"] for t in self.item_fields()} 979 + # add fields we know to be OK 980 + template |= { 981 + "path", 982 + "tags", 983 + "notes", 984 + "itemType", 985 + "creators", 986 + "mimeType", 987 + "linkMode", 988 + "note", 989 + "charset", 990 + "dateAdded", 991 + "version", 992 + "collections", 993 + "dateModified", 994 + "relations", 995 + # attachment items 996 + "parentItem", 997 + "mtime", 998 + "contentType", 999 + "md5", 1000 + "filename", 1001 + "inPublications", 1002 + # annotation fields 1003 + "annotationText", 1004 + "annotationColor", 1005 + "annotationType", 1006 + "annotationPageLabel", 1007 + "annotationPosition", 1008 + "annotationSortIndex", 1009 + "annotationComment", 1010 + "annotationAuthorName", 1011 + } 1012 + template |= set(self.temp_keys) 1013 + processed_items = [] 1014 + for pos, item in enumerate(items): 1015 + if set(item) == {"links", "library", "version", "meta", "key", "data"}: 1016 + itm = item["data"] 1017 + else: 1018 + itm = item 1019 + to_check = set(itm.keys()) 1020 + difference = to_check.difference(template) 1021 + if difference: 1022 + err = f"Invalid keys present in item {pos + 1}: {' '.join(i for i in difference)}" 1023 + raise ze.InvalidItemFieldsError(err) 1024 + processed_items.append(itm) 1025 + return processed_items 1026 + 1027 + @tcache 1028 + def item_types(self): 1029 + """Get all available item types.""" 1030 + # Check for a valid cached version 1031 + params = {"locale": self.locale} 1032 + query_string = "/itemTypes" 1033 + return query_string, params 1034 + 1035 + @tcache 1036 + def creator_fields(self): 1037 + """Get localised creator fields.""" 1038 + # Check for a valid cached version 1039 + params = {"locale": self.locale} 1040 + query_string = "/creatorFields" 1041 + return query_string, params 1042 + 1043 + @tcache 1044 + def item_type_fields(self, itemtype): 1045 + """Get all valid fields for an item.""" 1046 + params = {"itemType": itemtype, "locale": self.locale} 1047 + query_string = "/itemTypeFields" 1048 + return query_string, params 1049 + 1050 + @tcache 1051 + def item_creator_types(self, itemtype): 1052 + """Get all available creator types for an item.""" 1053 + params = {"itemType": itemtype, "locale": self.locale} 1054 + query_string = "/itemTypeCreatorTypes" 1055 + return query_string, params 1056 + 1057 + @tcache 1058 + def item_fields(self): 1059 + """Get all available item fields.""" 1060 + # Check for a valid cached version 1061 + params = {"locale": self.locale} 1062 + query_string = "/itemFields" 1063 + return query_string, params 1064 + 1065 + def item_attachment_link_modes(): 1066 + """Get all available link mode types. 1067 + 1068 + Note: No viable REST API route was found for this, so I tested and built 1069 + a list from documentation found here: 1070 + https://www.zotero.org/support/dev/web_api/json 1071 + """ 1072 + return ["imported_file", "imported_url", "linked_file", "linked_url"] 1073 + 1074 + def create_items(self, payload, parentid=None, last_modified=None): 1075 + """Create new Zotero items. 1076 + 1077 + Accepts two arguments: 1078 + a list containing one or more item dicts 1079 + an optional parent item ID. 1080 + Note that this can also be used to update existing items. 1081 + """ 1082 + if len(payload) > DEFAULT_NUM_ITEMS: 1083 + msg = f"You may only create up to {DEFAULT_NUM_ITEMS} items per call" 1084 + raise ze.TooManyItemsError(msg) 1085 + # TODO: strip extra data if it's an existing item 1086 + headers = {"Zotero-Write-Token": token(), "Content-Type": "application/json"} 1087 + if last_modified is not None: 1088 + headers["If-Unmodified-Since-Version"] = str(last_modified) 1089 + to_send = list(self._cleanup(*payload, allow=("key"))) 1090 + self._check_backoff() 1091 + req = self.client.post( 1092 + url=build_url( 1093 + self.endpoint, 1094 + f"/{self.library_type}/{self.library_id}/items", 1095 + ), 1096 + content=json.dumps(to_send), 1097 + headers=dict(headers), 1098 + ) 1099 + self.request = req 1100 + try: 1101 + req.raise_for_status() 1102 + except httpx.HTTPError as exc: 1103 + error_handler(self, req, exc) 1104 + resp = req.json() 1105 + backoff = self.request.headers.get("backoff") or self.request.headers.get( 1106 + "retry-after", 1107 + ) 1108 + if backoff: 1109 + self._set_backoff(backoff) 1110 + if parentid: 1111 + # we need to create child items using PATCH 1112 + # TODO: handle possibility of item creation + failed parent attachment 1113 + uheaders = { 1114 + "If-Unmodified-Since-Version": req.headers["last-modified-version"], 1115 + } 1116 + for value in resp["success"].values(): 1117 + payload = {"parentItem": parentid} 1118 + self._check_backoff() 1119 + presp = self.client.patch( 1120 + url=build_url( 1121 + self.endpoint, 1122 + f"/{self.library_type}/{self.library_id}/items/{value}", 1123 + ), 1124 + json=payload, 1125 + headers=dict(uheaders), 1126 + ) 1127 + self.request = presp 1128 + try: 1129 + presp.raise_for_status() 1130 + except httpx.HTTPError as exc: 1131 + error_handler(self, presp, exc) 1132 + backoff = presp.headers.get("backoff") or presp.headers.get( 1133 + "retry-after", 1134 + ) 1135 + if backoff: 1136 + self._set_backoff(backoff) 1137 + return resp 1138 + 1139 + def create_collection(self, payload, last_modified=None): 1140 + """Alias for create_collections to preserve backward compatibility.""" 1141 + return self.create_collections(payload, last_modified) 1142 + 1143 + def create_collections(self, payload, last_modified=None): 1144 + """Create new Zotero collections. 1145 + 1146 + Accepts one argument, a list of dicts containing the following keys: 1147 + 'name': the name of the collection 1148 + 'parentCollection': OPTIONAL, the parent collection to which you wish to add this 1149 + """ 1150 + # no point in proceeding if there's no 'name' key 1151 + for item in payload: 1152 + if "name" not in item: 1153 + msg = "The dict you pass must include a 'name' key" 1154 + raise ze.ParamNotPassedError(msg) 1155 + # add a blank 'parentCollection' key if it hasn't been passed 1156 + if "parentCollection" not in item: 1157 + item["parentCollection"] = "" 1158 + headers = {"Zotero-Write-Token": token()} 1159 + if last_modified is not None: 1160 + headers["If-Unmodified-Since-Version"] = str(last_modified) 1161 + self._check_backoff() 1162 + req = self.client.post( 1163 + url=build_url( 1164 + self.endpoint, 1165 + f"/{self.library_type}/{self.library_id}/collections", 1166 + ), 1167 + headers=headers, 1168 + content=json.dumps(payload), 1169 + ) 1170 + self.request = req 1171 + try: 1172 + req.raise_for_status() 1173 + except httpx.HTTPError as exc: 1174 + error_handler(self, req, exc) 1175 + backoff = req.headers.get("backoff") or req.headers.get("retry-after") 1176 + if backoff: 1177 + self._set_backoff(backoff) 1178 + return req.json() 1179 + 1180 + @backoff_check 1181 + def update_collection(self, payload, last_modified=None): 1182 + """Update a Zotero collection property such as 'name'. 1183 + 1184 + Accepts one argument, a dict containing collection data retrieved 1185 + using e.g. 'collections()'. 1186 + """ 1187 + modified = payload["version"] 1188 + if last_modified is not None: 1189 + modified = last_modified 1190 + key = payload["key"] 1191 + headers = {"If-Unmodified-Since-Version": str(modified)} 1192 + headers.update({"Content-Type": "application/json"}) 1193 + return self.client.put( 1194 + url=build_url( 1195 + self.endpoint, 1196 + f"/{self.library_type}/{self.library_id}/collections/{key}", 1197 + ), 1198 + headers=headers, 1199 + content=json.dumps(payload), 1200 + ) 1201 + 1202 + def attachment_simple(self, files, parentid=None): 1203 + """Add attachments using filenames as title. 1204 + 1205 + Args: 1206 + files: One or more file paths to add as attachments. 1207 + parentid: Optional Item ID to create child attachments. 1208 + 1209 + """ 1210 + orig = self._attachment_template("imported_file") 1211 + to_add = [orig.copy() for fls in files] 1212 + for idx, tmplt in enumerate(to_add): 1213 + tmplt["title"] = Path(files[idx]).name 1214 + tmplt["filename"] = files[idx] 1215 + if parentid: 1216 + return self._attachment(to_add, parentid) 1217 + return self._attachment(to_add) 1218 + 1219 + def attachment_both(self, files, parentid=None): 1220 + """Add child attachments using title, filename. 1221 + 1222 + Args: 1223 + files: One or more lists or tuples containing (title, file path). 1224 + parentid: Optional Item ID to create child attachments. 1225 + 1226 + """ 1227 + orig = self._attachment_template("imported_file") 1228 + to_add = [orig.copy() for f in files] 1229 + for idx, tmplt in enumerate(to_add): 1230 + tmplt["title"] = files[idx][0] 1231 + tmplt["filename"] = files[idx][1] 1232 + if parentid: 1233 + return self._attachment(to_add, parentid) 1234 + return self._attachment(to_add) 1235 + 1236 + @backoff_check 1237 + def update_item(self, payload, last_modified=None): 1238 + """Update an existing item. 1239 + 1240 + Accepts one argument, a dict containing Item data. 1241 + """ 1242 + to_send = self.check_items([payload])[0] 1243 + modified = payload["version"] if last_modified is None else last_modified 1244 + ident = payload["key"] 1245 + headers = {"If-Unmodified-Since-Version": str(modified)} 1246 + return self.client.patch( 1247 + url=build_url( 1248 + self.endpoint, 1249 + f"/{self.library_type}/{self.library_id}/items/{ident}", 1250 + ), 1251 + headers=headers, 1252 + content=json.dumps(to_send), 1253 + ) 1254 + 1255 + def update_items(self, payload): 1256 + """Update existing items. 1257 + 1258 + Accepts one argument, a list of dicts containing Item data. 1259 + """ 1260 + to_send = [self.check_items([p])[0] for p in payload] 1261 + # the API only accepts 50 items at a time, so we have to split anything longer 1262 + for chunk in chunks(to_send, DEFAULT_NUM_ITEMS): 1263 + self._check_backoff() 1264 + req = self.client.post( 1265 + url=build_url( 1266 + self.endpoint, 1267 + f"/{self.library_type}/{self.library_id}/items/", 1268 + ), 1269 + json=chunk, 1270 + ) 1271 + self.request = req 1272 + try: 1273 + req.raise_for_status() 1274 + except httpx.HTTPError as exc: 1275 + error_handler(self, req, exc) 1276 + backoff = req.headers.get("backoff") or req.headers.get("retry-after") 1277 + if backoff: 1278 + self._set_backoff(backoff) 1279 + return True 1280 + 1281 + def update_collections(self, payload): 1282 + """Update existing collections. 1283 + 1284 + Accepts one argument, a list of dicts containing Collection data. 1285 + """ 1286 + to_send = [self.check_items([p])[0] for p in payload] 1287 + # the API only accepts 50 items at a time, so we have to split anything longer 1288 + for chunk in chunks(to_send, DEFAULT_NUM_ITEMS): 1289 + self._check_backoff() 1290 + req = self.client.post( 1291 + url=build_url( 1292 + self.endpoint, 1293 + f"/{self.library_type}/{self.library_id}/collections/", 1294 + ), 1295 + json=chunk, 1296 + ) 1297 + self.request = req 1298 + try: 1299 + req.raise_for_status() 1300 + except httpx.HTTPError as exc: 1301 + error_handler(self, req, exc) 1302 + backoff = req.headers.get("backoff") or req.headers.get("retry-after") 1303 + if backoff: 1304 + self._set_backoff(backoff) 1305 + return True 1306 + 1307 + @backoff_check 1308 + def addto_collection(self, collection, payload): 1309 + """Add item to a collection. 1310 + 1311 + Accepts two arguments: The collection ID, and an item dict. 1312 + """ 1313 + ident = payload["key"] 1314 + modified = payload["version"] 1315 + # add the collection data from the item 1316 + modified_collections = payload["data"]["collections"] + [collection] 1317 + headers = {"If-Unmodified-Since-Version": str(modified)} 1318 + return self.client.patch( 1319 + url=build_url( 1320 + self.endpoint, 1321 + f"/{self.library_type}/{self.library_id}/items/{ident}", 1322 + ), 1323 + json={"collections": modified_collections}, 1324 + headers=headers, 1325 + ) 1326 + 1327 + @backoff_check 1328 + def deletefrom_collection(self, collection, payload): 1329 + """Delete an item from a collection. 1330 + 1331 + Accepts two arguments: The collection ID, and an item dict. 1332 + """ 1333 + ident = payload["key"] 1334 + modified = payload["version"] 1335 + # strip the collection data from the item 1336 + modified_collections = [ 1337 + c for c in payload["data"]["collections"] if c != collection 1338 + ] 1339 + headers = {"If-Unmodified-Since-Version": str(modified)} 1340 + return self.client.patch( 1341 + url=build_url( 1342 + self.endpoint, 1343 + f"/{self.library_type}/{self.library_id}/items/{ident}", 1344 + ), 1345 + json={"collections": modified_collections}, 1346 + headers=headers, 1347 + ) 1348 + 1349 + @backoff_check 1350 + def delete_tags(self, *payload): 1351 + """Delete a group of tags. 1352 + 1353 + Pass in up to 50 tags, or use *[tags]. 1354 + """ 1355 + if len(payload) > DEFAULT_NUM_ITEMS: 1356 + msg = f"Only {DEFAULT_NUM_ITEMS} tags or fewer may be deleted" 1357 + raise ze.TooManyItemsError(msg) 1358 + modified_tags = " || ".join(list(payload)) 1359 + # first, get version data by getting one tag 1360 + self.tags(limit=1) 1361 + headers = { 1362 + "If-Unmodified-Since-Version": self.request.headers[ 1363 + "last-modified-version" 1364 + ], 1365 + } 1366 + return self.client.delete( 1367 + url=build_url( 1368 + self.endpoint, 1369 + f"/{self.library_type}/{self.library_id}/tags", 1370 + ), 1371 + params={"tag": modified_tags}, 1372 + headers=headers, 1373 + ) 1374 + 1375 + @backoff_check 1376 + def delete_item(self, payload, last_modified=None): 1377 + """Delete Items from a Zotero library. 1378 + 1379 + Accepts a single argument: 1380 + a dict containing item data 1381 + OR a list of dicts containing item data 1382 + """ 1383 + params = None 1384 + if isinstance(payload, list): 1385 + params = {"itemKey": ",".join([p["key"] for p in payload])} 1386 + if last_modified is not None: 1387 + modified = last_modified 1388 + else: 1389 + modified = payload[0]["version"] 1390 + url = build_url( 1391 + self.endpoint, 1392 + f"/{self.library_type}/{self.library_id}/items", 1393 + ) 1394 + else: 1395 + ident = payload["key"] 1396 + if last_modified is not None: 1397 + modified = last_modified 1398 + else: 1399 + modified = payload["version"] 1400 + url = build_url( 1401 + self.endpoint, 1402 + f"/{self.library_type}/{self.library_id}/items/{ident}", 1403 + ) 1404 + headers = {"If-Unmodified-Since-Version": str(modified)} 1405 + return self.client.delete(url=url, params=params, headers=headers) 1406 + 1407 + @backoff_check 1408 + def delete_collection(self, payload, last_modified=None): 1409 + """Delete a Collection from a Zotero library. 1410 + 1411 + Accepts a single argument: 1412 + a dict containing item data 1413 + OR a list of dicts containing item data 1414 + """ 1415 + params = None 1416 + if isinstance(payload, list): 1417 + params = {"collectionKey": ",".join([p["key"] for p in payload])} 1418 + if last_modified is not None: 1419 + modified = last_modified 1420 + else: 1421 + modified = payload[0]["version"] 1422 + url = build_url( 1423 + self.endpoint, 1424 + f"/{self.library_type}/{self.library_id}/collections", 1425 + ) 1426 + else: 1427 + ident = payload["key"] 1428 + if last_modified is not None: 1429 + modified = last_modified 1430 + else: 1431 + modified = payload["version"] 1432 + url = build_url( 1433 + self.endpoint, 1434 + f"/{self.library_type}/{self.library_id}/collections/{ident}", 1435 + ) 1436 + headers = {"If-Unmodified-Since-Version": str(modified)} 1437 + return self.client.delete(url=url, params=params, headers=headers) 1438 + 1439 + 1440 + __all__ = ["Zotero"]
+195
src/pyzotero/_decorators.py
··· 1 + """Decorator functions for Pyzotero. 2 + 3 + These decorators handle caching, backoff, and response processing for API calls. 4 + They are tightly coupled with the Zotero class and are internal implementation details. 5 + """ 6 + 7 + from __future__ import annotations 8 + 9 + import io 10 + import zipfile 11 + from functools import wraps 12 + from typing import TYPE_CHECKING, Any 13 + from urllib.parse import urlparse 14 + 15 + import bibtexparser 16 + import feedparser 17 + import httpx 18 + from httpx import Request 19 + 20 + from ._utils import DEFAULT_TIMEOUT, build_url 21 + from .errors import error_handler 22 + 23 + if TYPE_CHECKING: 24 + from collections.abc import Callable 25 + 26 + 27 + def cleanwrap(func: Callable) -> Callable: 28 + """Wrap for Zotero._cleanup to process multiple items.""" 29 + 30 + def enc(self, *args, **kwargs): 31 + """Send each item to _cleanup().""" 32 + return (func(self, item, **kwargs) for item in args) 33 + 34 + return enc 35 + 36 + 37 + def tcache(func: Callable) -> Callable: 38 + """Handle URL building and caching for template functions.""" 39 + 40 + @wraps(func) 41 + def wrapped_f(self, *args, **kwargs): 42 + """Call the decorated function to get query string and params, 43 + builds URL, retrieves template, caches result, and returns template. 44 + """ 45 + query_string, params = func(self, *args, **kwargs) 46 + params["timeout"] = DEFAULT_TIMEOUT 47 + r = Request( 48 + "GET", 49 + build_url(self.endpoint, query_string), 50 + params=params, 51 + ) 52 + with httpx.Client() as client: 53 + response = client.send(r) 54 + 55 + # now split up the URL 56 + result = urlparse(str(response.url)) 57 + # construct cache key 58 + cachekey = f"{result.path}_{result.query}" 59 + if self.templates.get(cachekey) and not self._updated( 60 + query_string, 61 + self.templates[cachekey], 62 + cachekey, 63 + ): 64 + return self.templates[cachekey]["tmplt"] 65 + # otherwise perform a normal request and cache the response 66 + retrieved = self._retrieve_data(query_string, params=params) 67 + return self._cache(retrieved, cachekey) 68 + 69 + return wrapped_f 70 + 71 + 72 + def backoff_check(func: Callable) -> Callable: 73 + """Perform backoff processing for write operations. 74 + 75 + func must return a Requests GET / POST / PUT / PATCH / DELETE etc. 76 + This is intercepted: we first check for an active backoff 77 + and wait if need be. 78 + After the response is received, we do normal error checking 79 + and set a new backoff if necessary, before returning. 80 + 81 + Use with functions that are intended to return True. 82 + """ 83 + 84 + @wraps(func) 85 + def wrapped_f(self, *args, **kwargs): 86 + self._check_backoff() 87 + # resp is a Requests response object 88 + resp = func(self, *args, **kwargs) 89 + try: 90 + resp.raise_for_status() 91 + except httpx.HTTPError as exc: 92 + error_handler(self, resp, exc) 93 + self.request = resp 94 + backoff = resp.headers.get("backoff") or resp.headers.get("retry-after") 95 + if backoff: 96 + self._set_backoff(backoff) 97 + 98 + return True 99 + 100 + return wrapped_f 101 + 102 + 103 + def retrieve(func: Callable) -> Callable: 104 + """Call _retrieve_data() and pass the result to the correct processor.""" 105 + 106 + @wraps(func) 107 + def wrapped_f(self, *args, **kwargs) -> Any: 108 + """Return result of _retrieve_data(). 109 + 110 + func's return value is part of a URI, and it's this 111 + which is intercepted and passed to _retrieve_data: 112 + '/users/123/items?key=abc123' 113 + """ 114 + if kwargs: 115 + self.add_parameters(**kwargs) 116 + retrieved = self._retrieve_data(func(self, *args)) 117 + # we now always have links in the header response 118 + self.links = self._extract_links() 119 + # determine content and format, based on url params 120 + content = ( 121 + self.content.search(str(self.request.url)) 122 + and self.content.search(str(self.request.url)).group(0) 123 + ) or "bib" 124 + # select format, or assume JSON 125 + content_type_header = self.request.headers["Content-Type"].lower() + ";" 126 + fmt = self.formats.get( 127 + # strip "; charset=..." segment 128 + content_type_header[0 : content_type_header.index(";")], 129 + "json", 130 + ) 131 + # clear all query parameters 132 + self.url_params = None 133 + # Zotero API returns plain-text attachments as zipped content 134 + # We can inspect the redirect header to check whether Zotero compressed the file 135 + if fmt == "zip": 136 + if ( 137 + self.request.history 138 + and self.request.history[0].headers.get("Zotero-File-Compressed") 139 + == "Yes" 140 + ): 141 + z = zipfile.ZipFile(io.BytesIO(retrieved.content)) 142 + namelist = z.namelist() 143 + file = z.read(namelist[0]) 144 + else: 145 + file = retrieved.content 146 + return file 147 + # check to see whether it's tag data 148 + if "tags" in str(self.request.url): 149 + self.tag_data = False 150 + return self._tags_data(retrieved.json()) 151 + if fmt == "atom": 152 + parsed = feedparser.parse(retrieved.text) 153 + # select the correct processor 154 + processor = self.processors.get(content) 155 + # process the content correctly with a custom rule 156 + return processor(parsed) 157 + if fmt == "snapshot": 158 + # we need to dump as a zip! 159 + self.snapshot = True 160 + if fmt == "bibtex": 161 + parser = bibtexparser.bparser.BibTexParser( 162 + common_strings=True, 163 + ignore_nonstandard_types=False, 164 + ) 165 + return parser.parse(retrieved.text) 166 + # it's binary, so return raw content 167 + if fmt != "json": 168 + return retrieved.content 169 + # no need to do anything special, return JSON 170 + return retrieved.json() 171 + 172 + return wrapped_f 173 + 174 + 175 + def ss_wrap(func: Callable) -> Callable: 176 + """Ensure that a SavedSearch object exists before method execution.""" 177 + 178 + def wrapper(self, *args, **kwargs): 179 + if not self.savedsearch: 180 + # Import here to avoid circular imports 181 + from ._search import SavedSearch # noqa: PLC0415 182 + 183 + self.savedsearch = SavedSearch(self) 184 + return func(self, *args, **kwargs) 185 + 186 + return wrapper 187 + 188 + 189 + __all__ = [ 190 + "backoff_check", 191 + "cleanwrap", 192 + "retrieve", 193 + "ss_wrap", 194 + "tcache", 195 + ]
+190
src/pyzotero/_search.py
··· 1 + """Saved search functionality for Pyzotero. 2 + 3 + This module contains the SavedSearch class for creating and managing 4 + Zotero saved searches. 5 + """ 6 + 7 + from __future__ import annotations 8 + 9 + from typing import TYPE_CHECKING 10 + 11 + from . import errors as ze 12 + 13 + if TYPE_CHECKING: 14 + from ._client import Zotero 15 + 16 + 17 + class SavedSearch: 18 + """Saved search functionality. 19 + 20 + See https://github.com/zotero/zotero/blob/master/chrome/content/zotero/xpcom/data/searchConditions.js 21 + """ 22 + 23 + def __init__(self, zinstance: Zotero) -> None: 24 + super().__init__() 25 + self.zinstance = zinstance 26 + self.searchkeys = ("condition", "operator", "value") 27 + # always exclude these fields from zotero.item_keys() 28 + self.excluded_items = ( 29 + "accessDate", 30 + "date", 31 + "pages", 32 + "section", 33 + "seriesNumber", 34 + "issue", 35 + ) 36 + self.operators = { 37 + "is": "is", 38 + "isNot": "isNot", 39 + "beginsWith": "beginsWith", 40 + "contains": "contains", 41 + "doesNotContain": "doesNotContain", 42 + "isLessThan": "isLessThan", 43 + "isGreaterThan": "isGreaterThan", 44 + "isBefore": "isBefore", 45 + "isAfter": "isAfter", 46 + "isInTheLast": "isInTheLast", 47 + "any": "any", 48 + "all": "all", 49 + "true": "true", 50 + "false": "false", 51 + } 52 + # common groupings of operators 53 + self.groups = { 54 + "A": (self.operators["true"], self.operators["false"]), 55 + "B": (self.operators["any"], self.operators["all"]), 56 + "C": ( 57 + self.operators["is"], 58 + self.operators["isNot"], 59 + self.operators["contains"], 60 + self.operators["doesNotContain"], 61 + ), 62 + "D": (self.operators["is"], self.operators["isNot"]), 63 + "E": ( 64 + self.operators["is"], 65 + self.operators["isNot"], 66 + self.operators["isBefore"], 67 + self.operators["isInTheLast"], 68 + ), 69 + "F": (self.operators["contains"], self.operators["doesNotContain"]), 70 + "G": ( 71 + self.operators["is"], 72 + self.operators["isNot"], 73 + self.operators["contains"], 74 + self.operators["doesNotContain"], 75 + self.operators["isLessThan"], 76 + self.operators["isGreaterThan"], 77 + ), 78 + "H": ( 79 + self.operators["is"], 80 + self.operators["isNot"], 81 + self.operators["beginsWith"], 82 + ), 83 + "I": (self.operators["is"]), 84 + } 85 + self.conditions_operators = { 86 + "deleted": self.groups["A"], 87 + "noChildren": self.groups["A"], 88 + "unfiled": self.groups["A"], 89 + "publications": self.groups["A"], 90 + "retracted": self.groups["A"], 91 + "includeParentsAndChildren": self.groups["A"], 92 + "includeParents": self.groups["A"], 93 + "includeChildren": self.groups["A"], 94 + "recursive": self.groups["A"], 95 + "joinMode": self.groups["B"], 96 + "quicksearch-titleCreatorYear": self.groups["C"], 97 + "quicksearch-titleCreatorYearNote": self.groups["C"], 98 + "quicksearch-fields": self.groups["C"], 99 + "quicksearch-everything": self.groups["C"], 100 + "collectionID": self.groups["D"], 101 + "savedSearchID": self.groups["D"], 102 + "collection": self.groups["D"], 103 + "savedSearch": self.groups["D"], 104 + "dateAdded": self.groups["E"], 105 + "dateModified": self.groups["E"], 106 + "itemType": self.groups["D"], 107 + "fileTypeID": self.groups["D"], 108 + "tagID": self.groups["D"], 109 + "tag": self.groups["C"], 110 + "note": self.groups["F"], 111 + "childNote": self.groups["F"], 112 + "creator": self.groups["C"], 113 + "lastName": self.groups["C"], 114 + "field": self.groups["C"], 115 + "datefield": self.groups["E"], 116 + "year": self.groups["C"], 117 + "numberfield": self.groups["G"], 118 + "libraryID": self.groups["D"], 119 + "key": self.groups["H"], 120 + "itemID": self.groups["D"], 121 + "annotationText": self.groups["F"], 122 + "annotationComment": self.groups["F"], 123 + "fulltextWord": self.groups["F"], 124 + "fulltextContent": self.groups["F"], 125 + "tempTable": self.groups["I"], 126 + } 127 + ########### 128 + # ALIASES # 129 + ########### 130 + # aliases for numberfield 131 + pagefields = ( 132 + "pages", 133 + "numPages", 134 + "numberOfVolumes", 135 + "section", 136 + "seriesNumber", 137 + "issue", 138 + ) 139 + for pf in pagefields: 140 + self.conditions_operators[pf] = self.conditions_operators.get("numberfield") 141 + # aliases for datefield 142 + datefields = ("accessDate", "date", "dateDue", "accepted") 143 + for df in datefields: 144 + self.conditions_operators[df] = self.conditions_operators.get("datefield") 145 + # aliases for field - this makes a blocking API call unless item types have been cached 146 + item_fields = [ 147 + itm["field"] 148 + for itm in self.zinstance.item_fields() 149 + if itm["field"] not in set(self.excluded_items) 150 + ] 151 + for itf in item_fields: 152 + self.conditions_operators[itf] = self.conditions_operators.get("field") 153 + 154 + def _validate(self, conditions: list[dict]) -> None: 155 + """Validate saved search conditions. 156 + 157 + Raises an error if any contain invalid operators. 158 + """ 159 + allowed_keys = set(self.searchkeys) 160 + operators_set = set(self.operators.keys()) 161 + for condition in conditions: 162 + if set(condition.keys()) != allowed_keys: 163 + msg = f"Keys must be all of: {', '.join(self.searchkeys)}" 164 + raise ze.ParamNotPassedError(msg) 165 + if condition.get("operator") not in operators_set: 166 + msg = f"You have specified an unknown operator: {condition.get('operator')}" 167 + raise ze.ParamNotPassedError(msg) 168 + # dict keys of allowed operators for the current condition 169 + permitted_operators = self.conditions_operators.get( 170 + condition.get("condition"), 171 + ) 172 + if permitted_operators is None: 173 + msg = f"Unknown condition: {condition.get('condition')}" 174 + raise ze.ParamNotPassedError(msg) 175 + # transform these into values 176 + permitted_operators_list = { 177 + op_value 178 + for op in permitted_operators 179 + if (op_value := self.operators.get(op)) is not None 180 + } 181 + if condition.get("operator") not in permitted_operators_list: 182 + msg = ( 183 + f"You may not use the '{condition.get('operator')}' operator when " 184 + f"selecting the '{condition.get('condition')}' condition. \n" 185 + f"Allowed operators: {', '.join(list(permitted_operators_list))}" 186 + ) 187 + raise ze.ParamNotPassedError(msg) 188 + 189 + 190 + __all__ = ["SavedSearch"]
+248
src/pyzotero/_upload.py
··· 1 + """File upload functionality for Pyzotero. 2 + 3 + This module contains the Zupload class for handling file attachments 4 + and uploads to the Zotero API. 5 + """ 6 + 7 + from __future__ import annotations 8 + 9 + import hashlib 10 + import json 11 + import mimetypes 12 + from pathlib import Path 13 + from typing import TYPE_CHECKING, Any 14 + 15 + import httpx 16 + 17 + import pyzotero as pz 18 + 19 + from . import errors as ze 20 + from ._utils import build_url, token 21 + from .errors import error_handler 22 + 23 + if TYPE_CHECKING: 24 + from ._client import Zotero 25 + 26 + 27 + class Zupload: 28 + """Zotero file attachment helper. 29 + 30 + Receives a Zotero instance, file(s) to upload, and optional parent ID. 31 + """ 32 + 33 + def __init__( 34 + self, 35 + zinstance: Zotero, 36 + payload: list[dict], 37 + parentid: str | None = None, 38 + basedir: str | Path | None = None, 39 + ) -> None: 40 + super().__init__() 41 + self.zinstance = zinstance 42 + self.payload = payload 43 + self.parentid = parentid 44 + if basedir is None: 45 + self.basedir = Path() 46 + elif isinstance(basedir, Path): 47 + self.basedir = basedir 48 + else: 49 + self.basedir = Path(basedir) 50 + 51 + def _verify(self, payload: list[dict]) -> None: 52 + """Ensure that all files to be attached exist. 53 + 54 + open()'s better than exists(), cos it avoids a race condition. 55 + """ 56 + if not payload: # Check payload has nonzero length 57 + raise ze.ParamNotPassedError 58 + for templt in payload: 59 + if Path(str(self.basedir.joinpath(templt["filename"]))).is_file(): 60 + try: 61 + # if it is a file, try to open it, and catch the error 62 + with Path(str(self.basedir.joinpath(templt["filename"]))).open(): 63 + pass 64 + except OSError: 65 + msg = f"The file at {self.basedir.joinpath(templt['filename'])!s} couldn't be opened or found." 66 + raise ze.FileDoesNotExistError(msg) from None 67 + # no point in continuing if the file isn't a file 68 + else: 69 + msg = f"The file at {self.basedir.joinpath(templt['filename'])!s} couldn't be opened or found." 70 + raise ze.FileDoesNotExistError(msg) 71 + 72 + def _create_prelim(self) -> dict | None: 73 + """Step 0: Register intent to upload files.""" 74 + self._verify(self.payload) 75 + if "key" in self.payload[0] and self.payload[0]["key"]: 76 + if next((i for i in self.payload if "key" not in i), False): 77 + msg = "Can't pass payload entries with and without keys to Zupload" 78 + raise ze.UnsupportedParamsError(msg) 79 + return None # Don't do anything if payload comes with keys 80 + # Set contentType for each attachment if not already provided 81 + for item in self.payload: 82 + if not item.get("contentType"): 83 + filepath = str(self.basedir.joinpath(item["filename"])) 84 + detected_type = mimetypes.guess_type(filepath)[0] 85 + item["contentType"] = detected_type or "application/octet-stream" 86 + liblevel = "/{t}/{u}/items" 87 + # Create one or more new attachments 88 + headers = {"Zotero-Write-Token": token(), "Content-Type": "application/json"} 89 + # If we have a Parent ID, add it as a parentItem 90 + if self.parentid: 91 + for child in self.payload: 92 + child["parentItem"] = self.parentid 93 + to_send = json.dumps(self.payload) 94 + self.zinstance._check_backoff() 95 + req = self.zinstance.client.post( 96 + url=build_url( 97 + self.zinstance.endpoint, 98 + liblevel.format( 99 + t=self.zinstance.library_type, 100 + u=self.zinstance.library_id, 101 + ), 102 + ), 103 + content=to_send, 104 + headers=headers, 105 + ) 106 + try: 107 + req.raise_for_status() 108 + except httpx.HTTPError as exc: 109 + error_handler(self.zinstance, req, exc) 110 + backoff = req.headers.get("backoff") or req.headers.get("retry-after") 111 + if backoff: 112 + self.zinstance._set_backoff(backoff) 113 + data = req.json() 114 + for k in data["success"]: 115 + self.payload[int(k)]["key"] = data["success"][k] 116 + return data 117 + 118 + def _get_auth( 119 + self, attachment: str, reg_key: str, md5: str | None = None 120 + ) -> dict[str, Any]: 121 + """Step 1: get upload authorisation for a file.""" 122 + mtypes = mimetypes.guess_type(attachment) 123 + digest = hashlib.md5() # noqa: S324 124 + with Path(attachment).open("rb") as att: 125 + for chunk in iter(lambda: att.read(8192), b""): 126 + digest.update(chunk) 127 + auth_headers = {"Content-Type": "application/x-www-form-urlencoded"} 128 + if not md5: 129 + auth_headers["If-None-Match"] = "*" 130 + else: 131 + # docs specify that for existing file we use this 132 + auth_headers["If-Match"] = md5 133 + data = { 134 + "md5": digest.hexdigest(), 135 + "filename": Path(attachment).name, 136 + "filesize": Path(attachment).stat().st_size, 137 + "mtime": str(int(Path(attachment).stat().st_mtime * 1000)), 138 + "contentType": mtypes[0] or "application/octet-stream", 139 + "charset": mtypes[1], 140 + "params": 1, 141 + } 142 + self.zinstance._check_backoff() 143 + auth_req = self.zinstance.client.post( 144 + url=build_url( 145 + self.zinstance.endpoint, 146 + f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file", 147 + ), 148 + data=data, 149 + headers=auth_headers, 150 + ) 151 + try: 152 + auth_req.raise_for_status() 153 + except httpx.HTTPError as exc: 154 + error_handler(self.zinstance, auth_req, exc) 155 + backoff = auth_req.headers.get("backoff") or auth_req.headers.get("retry-after") 156 + if backoff: 157 + self.zinstance._set_backoff(backoff) 158 + return auth_req.json() 159 + 160 + def _upload_file( 161 + self, authdata: dict[str, Any], attachment: str, reg_key: str 162 + ) -> None: 163 + """Step 2: auth successful, and file not on server. 164 + 165 + See zotero.org/support/dev/server_api/file_upload#a_full_upload 166 + 167 + reg_key isn't used, but we need to pass it through to Step 3. 168 + """ 169 + upload_dict = authdata["params"] 170 + # pass tuple of tuples (not dict!), to ensure key comes first 171 + upload_list = [("key", upload_dict.pop("key"))] 172 + for key, value in upload_dict.items(): 173 + upload_list.append((key, value)) 174 + upload_list.append(("file", Path(attachment).open("rb").read())) 175 + upload_pairs = tuple(upload_list) 176 + try: 177 + self.zinstance._check_backoff() 178 + # We use a fresh httpx POST because we don't want our existing Pyzotero headers 179 + # for a call to the storage upload URL (currently S3) 180 + upload = httpx.post( 181 + url=authdata["url"], 182 + files=upload_pairs, 183 + headers={"User-Agent": f"Pyzotero/{pz.__version__}"}, 184 + ) 185 + except httpx.ConnectError: 186 + msg = "ConnectionError" 187 + raise ze.UploadError(msg) from None 188 + try: 189 + upload.raise_for_status() 190 + except httpx.HTTPError as exc: 191 + error_handler(self.zinstance, upload, exc) 192 + backoff = upload.headers.get("backoff") or upload.headers.get("retry-after") 193 + if backoff: 194 + self.zinstance._set_backoff(backoff) 195 + # now check the responses 196 + return self._register_upload(authdata, reg_key) 197 + 198 + def _register_upload(self, authdata: dict[str, Any], reg_key: str) -> None: 199 + """Step 3: upload successful, so register it.""" 200 + reg_headers = { 201 + "Content-Type": "application/x-www-form-urlencoded", 202 + "If-None-Match": "*", 203 + } 204 + reg_data = {"upload": authdata.get("uploadKey")} 205 + self.zinstance._check_backoff() 206 + upload_reg = self.zinstance.client.post( 207 + url=build_url( 208 + self.zinstance.endpoint, 209 + f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file", 210 + ), 211 + data=reg_data, 212 + headers=dict(reg_headers), 213 + ) 214 + try: 215 + upload_reg.raise_for_status() 216 + except httpx.HTTPError as exc: 217 + error_handler(self.zinstance, upload_reg, exc) 218 + backoff = upload_reg.headers.get("backoff") or upload_reg.headers.get( 219 + "retry-after", 220 + ) 221 + if backoff: 222 + self.zinstance._set_backoff(backoff) 223 + 224 + def upload(self) -> dict[str, list]: 225 + """File upload functionality. 226 + 227 + Goes through upload steps 0 - 3 (private class methods), and returns 228 + a dict noting success, failure, or unchanged 229 + (returning the payload entries with that property as a list for each status). 230 + """ 231 + result: dict[str, list] = {"success": [], "failure": [], "unchanged": []} 232 + self._create_prelim() 233 + for item in self.payload: 234 + if "key" not in item: 235 + result["failure"].append(item) 236 + continue 237 + attach = str(self.basedir.joinpath(item["filename"])) 238 + authdata = self._get_auth(attach, item["key"], md5=item.get("md5", None)) 239 + # no need to keep going if the file exists 240 + if authdata.get("exists"): 241 + result["unchanged"].append(item) 242 + continue 243 + self._upload_file(authdata, attach, item["key"]) 244 + result["success"].append(item) 245 + return result 246 + 247 + 248 + __all__ = ["Zupload"]
+76
src/pyzotero/_utils.py
··· 1 + """Utility functions for Pyzotero. 2 + 3 + This module contains helper functions used throughout the library. 4 + """ 5 + 6 + from __future__ import annotations 7 + 8 + import uuid 9 + from collections.abc import Iterator 10 + from pathlib import PurePosixPath 11 + from typing import TypeVar 12 + from urllib.parse import parse_qs, urlencode, urlparse, urlunparse 13 + 14 + # Avoid hanging the application if there's no server response 15 + DEFAULT_TIMEOUT = 30 16 + 17 + ONE_HOUR = 3600 18 + DEFAULT_NUM_ITEMS = 50 19 + DEFAULT_ITEM_LIMIT = 100 20 + 21 + T = TypeVar("T") 22 + 23 + 24 + def build_url(base_url: str, path: str, args_dict: dict | None = None) -> str: 25 + """Build a valid URL from base, path, and optional query parameters. 26 + 27 + This avoids string concatenation errors and leading/trailing slash issues. 28 + """ 29 + base_url = base_url.removesuffix("/") 30 + parsed = urlparse(base_url) 31 + new_path = str(PurePosixPath(parsed.path) / path.removeprefix("/")) 32 + if args_dict: 33 + return urlunparse(parsed._replace(path=new_path, query=urlencode(args_dict))) 34 + return urlunparse(parsed._replace(path=new_path)) 35 + 36 + 37 + def merge_params(url: str, params: dict) -> tuple[str, dict]: 38 + """Strip query parameters from URL and merge with provided params. 39 + 40 + Returns a tuple of (base_url, merged_params). 41 + """ 42 + parsed = urlparse(url) 43 + # Extract query parameters from URL 44 + incoming = parse_qs(parsed.query) 45 + incoming = {k: v[0] for k, v in incoming.items()} 46 + 47 + # Create new params dict by merging 48 + merged = {**incoming, **params} 49 + 50 + # Get base URL by zeroing out the query component 51 + base_url = urlunparse(parsed._replace(query="")) 52 + 53 + return base_url, merged 54 + 55 + 56 + def token() -> str: 57 + """Return a unique 32-char write-token.""" 58 + return str(uuid.uuid4().hex) 59 + 60 + 61 + def chunks(iterable: list[T], n: int) -> Iterator[list[T]]: 62 + """Yield successive n-sized chunks from an iterable.""" 63 + for i in range(0, len(iterable), n): 64 + yield iterable[i : i + n] 65 + 66 + 67 + __all__ = [ 68 + "DEFAULT_ITEM_LIMIT", 69 + "DEFAULT_NUM_ITEMS", 70 + "DEFAULT_TIMEOUT", 71 + "ONE_HOUR", 72 + "build_url", 73 + "chunks", 74 + "merge_params", 75 + "token", 76 + ]
+1 -1
src/pyzotero/cli.py
··· 3 3 import json 4 4 import sys 5 5 6 - import click 6 + import click # ty:ignore[unresolved-import] 7 7 import httpx 8 8 9 9 from pyzotero import __version__, zotero
+183
src/pyzotero/errors.py
··· 1 + """Exception classes and error handling for Pyzotero. 2 + 3 + This module defines all custom exceptions used by the library 4 + and the error_handler function for processing HTTP errors. 5 + """ 6 + 7 + from __future__ import annotations 8 + 9 + from typing import TYPE_CHECKING 10 + 11 + import httpx 12 + 13 + if TYPE_CHECKING: 14 + from typing import Any 15 + 16 + 17 + class PyZoteroError(Exception): 18 + """Generic parent exception for all Pyzotero errors.""" 19 + 20 + 21 + class ParamNotPassedError(PyZoteroError): 22 + """Raised if a parameter which is required isn't passed.""" 23 + 24 + 25 + class CallDoesNotExistError(PyZoteroError): 26 + """Raised if the specified API call doesn't exist.""" 27 + 28 + 29 + class UnsupportedParamsError(PyZoteroError): 30 + """Raised when unsupported parameters are passed.""" 31 + 32 + 33 + class UserNotAuthorisedError(PyZoteroError): 34 + """Raised when the user is not allowed to retrieve the resource.""" 35 + 36 + 37 + class TooManyItemsError(PyZoteroError): 38 + """Raised when too many items are passed to a Write API method.""" 39 + 40 + 41 + class MissingCredentialsError(PyZoteroError): 42 + """Raised when an attempt is made to create a Zotero instance 43 + without providing both the user ID and the user key. 44 + """ 45 + 46 + 47 + class InvalidItemFieldsError(PyZoteroError): 48 + """Raised when an attempt is made to create/update items w/invalid fields.""" 49 + 50 + 51 + class ResourceNotFoundError(PyZoteroError): 52 + """Raised when a resource (item, collection etc.) could not be found.""" 53 + 54 + 55 + class HTTPError(PyZoteroError): 56 + """Raised for miscellaneous HTTP errors.""" 57 + 58 + 59 + class CouldNotReachURLError(PyZoteroError): 60 + """Raised when we can't reach a URL.""" 61 + 62 + 63 + class ConflictError(PyZoteroError): 64 + """409 - Raised when the target library is locked.""" 65 + 66 + 67 + class PreConditionFailedError(PyZoteroError): 68 + """412 - Raised when the provided X-Zotero-Write-Token has already been 69 + submitted. 70 + """ 71 + 72 + 73 + class RequestEntityTooLargeError(PyZoteroError): 74 + """413 - The upload would exceed the storage quota of the library owner.""" 75 + 76 + 77 + class PreConditionRequiredError(PyZoteroError): 78 + """428 - Raised when If-Match or If-None-Match was not provided.""" 79 + 80 + 81 + class TooManyRequestsError(PyZoteroError): 82 + """429 - Raised when there are too many unfinished uploads. 83 + Try again after the number of seconds specified in the Retry-After header. 84 + """ 85 + 86 + 87 + class FileDoesNotExistError(PyZoteroError): 88 + """Raised when a file path to be attached can't be opened (or doesn't exist).""" 89 + 90 + 91 + class TooManyRetriesError(PyZoteroError): 92 + """Raise after the backoff period for new requests exceeds 32s.""" 93 + 94 + 95 + class UploadError(PyZoteroError): 96 + """Raise if the connection drops during upload or some other non-HTTP error 97 + code is returned. 98 + """ 99 + 100 + 101 + # Mapping of HTTP status codes to exception classes 102 + ERROR_CODES: dict[int, type[PyZoteroError]] = { 103 + 400: UnsupportedParamsError, 104 + 401: UserNotAuthorisedError, 105 + 403: UserNotAuthorisedError, 106 + 404: ResourceNotFoundError, 107 + 409: ConflictError, 108 + 412: PreConditionFailedError, 109 + 413: RequestEntityTooLargeError, 110 + 428: PreConditionRequiredError, 111 + 429: TooManyRequestsError, 112 + } 113 + 114 + 115 + def error_handler( 116 + zot: Any, req: httpx.Response, exc: BaseException | None = None 117 + ) -> None: 118 + """Error handler for HTTP requests. 119 + 120 + Raises appropriate exceptions based on HTTP status codes and handles 121 + rate limiting with backoff. 122 + 123 + Args: 124 + zot: A Zotero instance (or any object with _set_backoff method) 125 + req: The HTTP response object 126 + exc: Optional exception that triggered this handler 127 + 128 + """ 129 + 130 + def err_msg(req: httpx.Response) -> str: 131 + """Return a nicely-formatted error message.""" 132 + return ( 133 + f"\nCode: {req.status_code}\n" 134 + f"URL: {req.url!s}\n" 135 + f"Method: {req.request.method}\n" 136 + f"Response: {req.text}" 137 + ) 138 + 139 + if ERROR_CODES.get(req.status_code): 140 + # check to see whether its 429 141 + if req.status_code == httpx.codes.TOO_MANY_REQUESTS: 142 + # try to get backoff or delay duration 143 + delay = req.headers.get("backoff") or req.headers.get("retry-after") 144 + if not delay: 145 + msg = ( 146 + "You are being rate-limited and no backoff or retry duration " 147 + "has been received from the server. Try again later" 148 + ) 149 + raise TooManyRetriesError(msg) 150 + zot._set_backoff(delay) 151 + elif not exc: 152 + raise ERROR_CODES[req.status_code](err_msg(req)) 153 + else: 154 + raise ERROR_CODES[req.status_code](err_msg(req)) from exc 155 + elif not exc: 156 + raise HTTPError(err_msg(req)) 157 + else: 158 + raise HTTPError(err_msg(req)) from exc 159 + 160 + 161 + __all__ = [ 162 + "ERROR_CODES", 163 + "CallDoesNotExistError", 164 + "ConflictError", 165 + "CouldNotReachURLError", 166 + "FileDoesNotExistError", 167 + "HTTPError", 168 + "InvalidItemFieldsError", 169 + "MissingCredentialsError", 170 + "ParamNotPassedError", 171 + "PreConditionFailedError", 172 + "PreConditionRequiredError", 173 + "PyZoteroError", 174 + "RequestEntityTooLargeError", 175 + "ResourceNotFoundError", 176 + "TooManyItemsError", 177 + "TooManyRequestsError", 178 + "TooManyRetriesError", 179 + "UnsupportedParamsError", 180 + "UploadError", 181 + "UserNotAuthorisedError", 182 + "error_handler", 183 + ]
+2 -2
src/pyzotero/filetransport.py
··· 60 60 return not self.is_relative_url 61 61 62 62 63 - httpx.URL.is_relative_url = property(is_relative_url) # type: ignore 64 - httpx.URL.is_absolute_url = property(is_absolute_url) # type: ignore 63 + httpx.URL.is_relative_url = property(is_relative_url) 64 + httpx.URL.is_absolute_url = property(is_absolute_url) 65 65 66 66 67 67 class FileTransport(AsyncBaseTransport, BaseTransport):
+61 -2042
src/pyzotero/zotero.py
··· 1 - """Created by Stephan Hügel on 2011-02-28. 2 - 3 - This file is part of Pyzotero. 4 - """ 5 - 6 - __author__ = "Stephan Hügel" 7 - __api_version__ = "3" 8 - 9 - import copy 10 - import hashlib 11 - import io 12 - import json 13 - import mimetypes 14 - import re 15 - import threading 16 - import time 17 - import uuid 18 - import zipfile 19 - from collections import OrderedDict 20 - from functools import wraps 21 - from pathlib import Path, PurePosixPath 22 - from urllib.parse import ( 23 - parse_qs, 24 - parse_qsl, 25 - quote, 26 - unquote, 27 - urlencode, 28 - urlparse, 29 - urlunparse, 30 - ) 31 - 32 - import bibtexparser 33 - import feedparser 34 - import httpx 35 - import whenever 36 - from httpx import Request 37 - 38 - import pyzotero as pz 39 - 40 - from . import zotero_errors as ze 41 - from .filetransport import Client as File_Client 42 - 43 - # Avoid hanging the application if there's no server response 44 - timeout = 30 45 - 46 - ONE_HOUR = 3600 47 - DEFAULT_NUM_ITEMS = 50 48 - DEFAULT_ITEM_LIMIT = 100 49 - 50 - 51 - def build_url(base_url, path, args_dict=None): 52 - """Build a valid URL so we don't have to worry about string concatenation errors and 53 - leading / trailing slashes etc. 54 - """ 55 - base_url = base_url.removesuffix("/") 56 - parsed = urlparse(base_url) 57 - new_path = str(PurePosixPath(parsed.path) / path.removeprefix("/")) 58 - if args_dict: 59 - return urlunparse(parsed._replace(path=new_path, query=urlencode(args_dict))) 60 - return urlunparse(parsed._replace(path=new_path)) 61 - 62 - 63 - def merge_params(url, params): 64 - """Strip query parameters, extracting them into a dict, then merging it with 65 - the "params" dict, returning the truncated url and merged query params dict 66 - """ 67 - parsed = urlparse(url) 68 - # Extract query parameters from URL 69 - incoming = parse_qs(parsed.query) 70 - incoming = {k: v[0] for k, v in incoming.items()} 71 - 72 - # Create new params dict by merging 73 - merged = {**incoming, **params} 74 - 75 - # Get base URL by zeroing out the query component 76 - base_url = urlunparse(parsed._replace(query="")) 77 - 78 - return base_url, merged 79 - 80 - 81 - def token(): 82 - """Return a unique 32-char write-token""" 83 - return str(uuid.uuid4().hex) 84 - 85 - 86 - def cleanwrap(func): 87 - """Wrap for Zotero._cleanup""" 88 - 89 - def enc(self, *args, **kwargs): 90 - """Send each item to _cleanup()""" 91 - return (func(self, item, **kwargs) for item in args) 92 - 93 - return enc 94 - 95 - 96 - def chunks(iterable, n): 97 - """Yield successive n-sized chunks from l.""" 98 - for i in range(0, len(iterable), n): 99 - yield iterable[i : i + n] 100 - 101 - 102 - def tcache(func): 103 - """Take care of the URL building and caching for template functions""" 104 - 105 - @wraps(func) 106 - def wrapped_f(self, *args, **kwargs): 107 - """Call the decorated function to get query string and params, 108 - builds URL, retrieves template, caches result, and returns template 109 - """ 110 - query_string, params = func(self, *args, **kwargs) 111 - params["timeout"] = timeout 112 - r = Request( 113 - "GET", 114 - build_url(self.endpoint, query_string), 115 - params=params, 116 - ) 117 - with httpx.Client() as client: 118 - response = client.send(r) 119 - 120 - # now split up the URL 121 - result = urlparse(str(response.url)) 122 - # construct cache key 123 - cachekey = f"{result.path}_{result.query}" 124 - if self.templates.get(cachekey) and not self._updated( 125 - query_string, 126 - self.templates[cachekey], 127 - cachekey, 128 - ): 129 - return self.templates[cachekey]["tmplt"] 130 - # otherwise perform a normal request and cache the response 131 - retrieved = self._retrieve_data(query_string, params=params) 132 - return self._cache(retrieved, cachekey) 133 - 134 - return wrapped_f 135 - 136 - 137 - def backoff_check(func): 138 - """Perform backoff processing 139 - func must return a Requests GET / POST / PUT / PATCH / DELETE etc 140 - This is is intercepted: we first check for an active backoff 141 - and wait if need be. 142 - After the response is received, we do normal error checking 143 - and set a new backoff if necessary, before returning 144 - 145 - Use with functions that are intended to return True 146 - """ 147 - 148 - @wraps(func) 149 - def wrapped_f(self, *args, **kwargs): 150 - self._check_backoff() 151 - # resp is a Requests response object 152 - resp = func(self, *args, **kwargs) 153 - try: 154 - resp.raise_for_status() 155 - except httpx.HTTPError as exc: 156 - error_handler(self, resp, exc) 157 - self.request = resp 158 - backoff = resp.headers.get("backoff") or resp.headers.get("retry-after") 159 - if backoff: 160 - self._set_backoff(backoff) 161 - 162 - return True 163 - 164 - return wrapped_f 165 - 166 - 167 - def retrieve(func): 168 - """Call _retrieve_data() and passes 169 - the result to the correct processor, based on a lookup 170 - """ 171 - 172 - @wraps(func) 173 - def wrapped_f(self, *args, **kwargs): 174 - """Return result of _retrieve_data() 175 - 176 - func's return value is part of a URI, and it's this 177 - which is intercepted and passed to _retrieve_data: 178 - '/users/123/items?key=abc123' 179 - """ 180 - if kwargs: 181 - self.add_parameters(**kwargs) 182 - retrieved = self._retrieve_data(func(self, *args)) 183 - # we now always have links in the header response 184 - self.links = self._extract_links() 185 - # determine content and format, based on url params 186 - content = ( 187 - self.content.search(str(self.request.url)) 188 - and self.content.search(str(self.request.url)).group(0) 189 - ) or "bib" 190 - # select format, or assume JSON 191 - content_type_header = self.request.headers["Content-Type"].lower() + ";" 192 - fmt = self.formats.get( 193 - # strip "; charset=..." segment 194 - content_type_header[0 : content_type_header.index(";")], 195 - "json", 196 - ) 197 - # clear all query parameters 198 - self.url_params = None 199 - # Zotero API returns plain-text attachments as zipped content 200 - # We can inspect the redirect header to check whether Zotero compressed the file 201 - if fmt == "zip": 202 - if ( 203 - self.request.history 204 - and self.request.history[0].headers.get("Zotero-File-Compressed") 205 - == "Yes" 206 - ): 207 - z = zipfile.ZipFile(io.BytesIO(retrieved.content)) 208 - namelist = z.namelist() 209 - file = z.read(namelist[0]) 210 - else: 211 - file = retrieved.content 212 - return file 213 - # check to see whether it's tag data 214 - if "tags" in str(self.request.url): 215 - self.tag_data = False 216 - return self._tags_data(retrieved.json()) 217 - if fmt == "atom": 218 - parsed = feedparser.parse(retrieved.text) 219 - # select the correct processor 220 - processor = self.processors.get(content) 221 - # process the content correctly with a custom rule 222 - return processor(parsed) 223 - if fmt == "snapshot": 224 - # we need to dump as a zip! 225 - self.snapshot = True 226 - if fmt == "bibtex": 227 - parser = bibtexparser.bparser.BibTexParser( 228 - common_strings=True, 229 - ignore_nonstandard_types=False, 230 - ) 231 - return parser.parse(retrieved.text) 232 - # it's binary, so return raw content 233 - if fmt != "json": 234 - return retrieved.content 235 - # no need to do anything special, return JSON 236 - return retrieved.json() 237 - 238 - return wrapped_f 239 - 240 - 241 - def ss_wrap(func): 242 - """Ensure that a SavedSearch object exists""" 243 - 244 - def wrapper(self, *args, **kwargs): 245 - if not self.savedsearch: 246 - self.savedsearch = SavedSearch(self) 247 - return func(self, *args, **kwargs) 248 - 249 - return wrapper 250 - 251 - 252 - class Zotero: 253 - """Zotero API methods 254 - A full list of methods can be found here: 255 - http://www.zotero.org/support/dev/server_api 256 - """ 257 - 258 - def __init__( 259 - self, 260 - library_id=None, 261 - library_type=None, 262 - api_key=None, 263 - preserve_json_order=False, 264 - locale="en-US", 265 - local=False, 266 - ): 267 - self.client = None 268 - """Store Zotero credentials""" 269 - if not local: 270 - self.endpoint = "https://api.zotero.org" 271 - self.local = False 272 - else: 273 - self.endpoint = "http://localhost:23119/api" 274 - self.local = True 275 - if library_id is not None and library_type: 276 - self.library_id = library_id 277 - # library_type determines whether query begins w. /users or /groups 278 - self.library_type = library_type + "s" 279 - else: 280 - err = "Please provide both the library ID and the library type" 281 - raise ze.MissingCredentialsError(err) 282 - # api_key is not required for public individual or group libraries 283 - self.api_key = api_key 284 - self.preserve_json_order = preserve_json_order 285 - self.locale = locale 286 - self.url_params = None 287 - self.tag_data = False 288 - self.request = None 289 - self.snapshot = False 290 - self.client = httpx.Client( 291 - headers=self.default_headers(), 292 - follow_redirects=True, 293 - ) 294 - # these aren't valid item fields, so never send them to the server 295 - self.temp_keys = {"key", "etag", "group_id", "updated"} 296 - # determine which processor to use for the parsed content 297 - self.fmt = re.compile(r"(?<=format=)\w+") 298 - self.content = re.compile(r"(?<=content=)\w+") 299 - # JSON by default 300 - self.formats = { 301 - "application/atom+xml": "atom", 302 - "application/x-bibtex": "bibtex", 303 - "application/json": "json", 304 - "text/html": "snapshot", 305 - "text/plain": "plain", 306 - "text/markdown": "plain", 307 - "application/pdf; charset=utf-8": "pdf", 308 - "application/pdf": "pdf", 309 - "application/msword": "doc", 310 - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx", 311 - "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx", 312 - "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx", 313 - "application/zip": "zip", 314 - "application/epub+zip": "zip", 315 - "audio/mpeg": "mp3", 316 - "video/mp4": "mp4", 317 - "audio/x-wav": "wav", 318 - "video/x-msvideo": "avi", 319 - "application/octet-stream": "octet", 320 - "application/x-tex": "tex", 321 - "application/x-texinfo": "texinfo", 322 - "image/jpeg": "jpeg", 323 - "image/png": "png", 324 - "image/gif": "gif", 325 - "image/tiff": "tiff", 326 - "application/postscript": "postscript", 327 - "application/rtf": "rtf", 328 - } 329 - self.processors = { 330 - "bib": self._bib_processor, 331 - "citation": self._citation_processor, 332 - "bibtex": self._bib_processor, 333 - "bookmarks": self._bib_processor, 334 - "coins": self._bib_processor, 335 - "csljson": self._csljson_processor, 336 - "mods": self._bib_processor, 337 - "refer": self._bib_processor, 338 - "rdf_bibliontology": self._bib_processor, 339 - "rdf_dc": self._bib_processor, 340 - "rdf_zotero": self._bib_processor, 341 - "ris": self._bib_processor, 342 - "tei": self._bib_processor, 343 - "wikipedia": self._bib_processor, 344 - "json": self._json_processor, 345 - "html": self._bib_processor, 346 - } 347 - self.links = None 348 - self.self_link = {} 349 - self.templates = {} 350 - self.savedsearch = None 351 - # these are required for backoff handling 352 - self.backoff = False 353 - self.backoff_duration = 0.0 354 - 355 - def __del__(self): 356 - """Remove client before cleanup""" 357 - # this isn't guaranteed to run, but that's OK 358 - if c := self.client: 359 - c.close() 360 - 361 - @property 362 - def __version__(self): 363 - """Return the version of the pyzotero library""" 364 - return pz.__version__ 365 - 366 - def _check_for_component(self, url, component): 367 - """Check a url path query fragment for a specific query parameter""" 368 - return bool(parse_qs(url).get(component)) 369 - 370 - def _striplocal(self, url): 371 - """We need to remve the leading "/api" substring from urls if we're running in local mode""" 372 - if self.local: 373 - parsed = urlparse(url) 374 - purepath = PurePosixPath(unquote(parsed.path)) 375 - newpath = "/".join(purepath.parts[2:]) 376 - replaced = parsed._replace(path="/" + newpath) 377 - return urlunparse(replaced) 378 - return url 379 - 380 - def _set_backoff(self, duration): 381 - """Set a backoff 382 - Spins up a timer in a background thread which resets the backoff logic 383 - when it expires, then sets the time at which the backoff will expire. 384 - The latter step is required so that other calls can check whether there's 385 - an active backoff, because the threading.Timer method has no way 386 - of returning a duration 387 - """ 388 - duration = float(duration) 389 - self.backoff = True 390 - threading.Timer(duration, self._reset_backoff).start() 391 - self.backoff_duration = time.time() + duration 392 - 393 - def _reset_backoff(self): 394 - self.backoff = False 395 - self.backoff_duration = 0.0 396 - 397 - def _check_backoff(self): 398 - """Before an API call is made, we check whether there's an active backoff. 399 - If there is, we check whether there's any time left on the backoff. 400 - If there is, we sleep for the remainder before returning 401 - """ 402 - if self.backoff: 403 - remainder = self.backoff_duration - time.time() 404 - if remainder > 0.0: 405 - time.sleep(remainder) 406 - 407 - def default_headers(self): 408 - """It's always OK to include these headers""" 409 - _headers = { 410 - "User-Agent": f"Pyzotero/{pz.__version__}", 411 - "Zotero-API-Version": f"{__api_version__}", 412 - } 413 - if self.api_key: 414 - _headers["Authorization"] = f"Bearer {self.api_key}" 415 - return _headers 416 - 417 - def _cache(self, response, key): 418 - """Add a retrieved template to the cache for 304 checking 419 - accepts a dict and key name, adds the retrieval time, and adds both 420 - to self.templates as a new dict using the specified key 421 - """ 422 - # cache template and retrieval time for subsequent calls 423 - try: 424 - thetime = whenever.ZonedDateTime.now("Europe/London").py_datetime() 425 - except AttributeError: 426 - thetime = whenever.ZonedDateTime.now("Europe/London").py_datetime() 427 - self.templates[key] = {"tmplt": response.json(), "updated": thetime} 428 - return copy.deepcopy(response.json()) 429 - 430 - @cleanwrap 431 - def _cleanup(self, to_clean, allow=()): 432 - """Remove keys we added for internal use""" 433 - # this item's been retrieved from the API, we only need the 'data' 434 - # entry 435 - if to_clean.keys() == ["links", "library", "version", "meta", "key", "data"]: 436 - to_clean = to_clean["data"] 437 - return dict( 438 - [ 439 - [k, v] 440 - for k, v in list(to_clean.items()) 441 - if (k in allow or k not in self.temp_keys) 442 - ], 443 - ) 444 - 445 - def _retrieve_data(self, request=None, params=None): 446 - """Retrieve Zotero items via the API 447 - Combine endpoint and request to access the specific resource 448 - Returns a JSON document 449 - """ 450 - full_url = build_url(self.endpoint, request) 451 - # ensure that we wait if there's an active backoff 452 - self._check_backoff() 453 - # don't set locale if the url already contains it 454 - # we always add a locale if it's a "standalone" or first call 455 - needs_locale = not self.links or not self._check_for_component( 456 - self.links.get("next"), 457 - "locale", 458 - ) 459 - if needs_locale: 460 - if params: 461 - params["locale"] = self.locale 462 - else: 463 - params = {"locale": self.locale} 464 - # we now have to merge self.url_params (default params, and those supplied by the user) 465 - if not params: 466 - params = {} 467 - if not self.url_params: 468 - self.url_params = {} 469 - merged_params = {**self.url_params, **params} 470 - # our incoming url might be from the "links" dict, in which case it will contain url parameters. 471 - # Unfortunately, httpx doesn't like to merge query paramaters in the url string and passed params 472 - # so we strip the url params, combining them with our existing url_params 473 - final_url, final_params = merge_params(full_url, merged_params) 474 - # file URI errors are raised immediately so we have to try here 475 - try: 476 - self.request = self.client.get( 477 - url=final_url, 478 - params=final_params, 479 - headers=self.default_headers(), 480 - timeout=timeout, 481 - ) 482 - self.request.encoding = "utf-8" 483 - # The API doesn't return this any more, so we have to cheat 484 - self.self_link = self.request.url 485 - except httpx.UnsupportedProtocol: 486 - # File URI handler logic 487 - fc = File_Client() 488 - request = fc.get( 489 - url=final_url, 490 - params=final_params, 491 - headers=self.default_headers(), 492 - timeout=timeout, 493 - follow_redirects=True, 494 - ) 495 - self.request = request 496 - # since we'll be writing bytes, we need to set this to a type that will trigger the bytes processor 497 - self.request.headers["Content-Type"] = "text/plain" 498 - try: 499 - self.request.raise_for_status() 500 - except httpx.HTTPError as exc: 501 - error_handler(self, self.request, exc) 502 - backoff = self.request.headers.get("backoff") or self.request.headers.get( 503 - "retry-after", 504 - ) 505 - if backoff: 506 - self._set_backoff(backoff) 507 - return self.request 508 - 509 - def _extract_links(self): 510 - """Extract self, first, next, last links from a request response""" 511 - extracted = {} 512 - try: 513 - for key, value in self.request.links.items(): 514 - parsed = urlparse(value["url"]) 515 - fragment = urlunparse(("", "", parsed.path, "", parsed.query, "")) 516 - extracted[key] = fragment 517 - # add a 'self' link 518 - parsed = urlparse(str(self.self_link)) 519 - # strip 'format' query parameter and rebuild query string 520 - query_params = [(k, v) for k, v in parse_qsl(parsed.query) if k != "format"] 521 - # rebuild url fragment with just path and query (consistent with other links) 522 - extracted["self"] = urlunparse( 523 - ("", "", parsed.path, "", urlencode(query_params), "") 524 - ) 525 - except KeyError: 526 - # No links present, because it's a single item 527 - return None 528 - else: 529 - return extracted 530 - 531 - def _updated(self, url, payload, template=None): 532 - """Call to see if a template request returns 304 533 - accepts: 534 - - a string to combine with the API endpoint 535 - - a dict of format values, in case they're required by 'url' 536 - - a template name to check for 537 - As per the API docs, a template less than 1 hour old is 538 - assumed to be fresh, and will immediately return False if found 539 - """ 540 - # If the template is more than an hour old, try a 304 541 - if ( 542 - abs( 543 - whenever.ZonedDateTime.now("Europe/London").py_datetime() 544 - - self.templates[template]["updated"], 545 - ).seconds 546 - > ONE_HOUR 547 - ): 548 - query = build_url( 549 - self.endpoint, 550 - url.format(u=self.library_id, t=self.library_type, **payload), 551 - ) 552 - headers = { 553 - "If-Modified-Since": payload["updated"].strftime( 554 - "%a, %d %b %Y %H:%M:%S %Z", 555 - ), 556 - } 557 - # perform the request, and check whether the response returns 304 558 - self._check_backoff() 559 - req = self.client.get(query, headers=headers) 560 - try: 561 - req.raise_for_status() 562 - except httpx.HTTPError as exc: 563 - error_handler(self, req, exc) 564 - backoff = self.request.headers.get("backoff") or self.request.headers.get( 565 - "retry-after", 566 - ) 567 - if backoff: 568 - self._set_backoff(backoff) 569 - return req.status_code == httpx.codes.NOT_MODIFIED 570 - # Still plenty of life left in't 571 - return False 572 - 573 - def add_parameters(self, **params): 574 - """Add URL parameters. 575 - 576 - Also ensure that only valid format/content combinations are requested 577 - """ 578 - # Preserve constructor-level parameters (like locale) while allowing method-level overrides 579 - if self.url_params is None: 580 - self.url_params = {} 581 - 582 - # Store existing params to preserve things like locale 583 - preserved_params = self.url_params.copy() 584 - 585 - # we want JSON by default 586 - if not params.get("format"): 587 - params["format"] = "json" 588 - # non-standard content must be retrieved as Atom 589 - if params.get("content"): 590 - params["format"] = "atom" 591 - # TODO: rewrite format=atom, content=json request 592 - if "limit" not in params or params.get("limit") == 0: 593 - params["limit"] = DEFAULT_ITEM_LIMIT 594 - # Need ability to request arbitrary number of results for version 595 - # response 596 - # -1 value is hack that works with current version 597 - elif params["limit"] == -1 or params["limit"] is None: 598 - del params["limit"] 599 - # bib format can't have a limit 600 - if params.get("format") == "bib": 601 - params.pop("limit", None) 602 - 603 - # Merge preserved params with new params (new params override existing ones) 604 - self.url_params = {**preserved_params, **params} 605 - 606 - def _build_query(self, query_string, no_params=False): 607 - """Set request parameters. Will always add the user ID if it hasn't 608 - been specifically set by an API method 609 - """ 610 - try: 611 - query = quote(query_string.format(u=self.library_id, t=self.library_type)) 612 - except KeyError as err: 613 - errmsg = f"There's a request parameter missing: {err}" 614 - raise ze.ParamNotPassedError(errmsg) from None 615 - # Add the URL parameters and the user key, if necessary 616 - if no_params is False and not self.url_params: 617 - self.add_parameters() 618 - return query 619 - 620 - @retrieve 621 - def publications(self): 622 - """Return the contents of My Publications.""" 623 - if self.library_type != "users": 624 - msg = "This API call does not exist for group libraries" 625 - raise ze.CallDoesNotExistError( 626 - msg, 627 - ) 628 - query_string = "/{t}/{u}/publications/items" 629 - return self._build_query(query_string) 630 - 631 - # The following methods are Zotero Read API calls 632 - def num_items(self): 633 - """Return the total number of top-level items in the library""" 634 - query = "/{t}/{u}/items/top" 635 - return self._totals(query) 636 - 637 - def count_items(self): 638 - """Return the count of all items in a group / library""" 639 - query = "/{t}/{u}/items" 640 - return self._totals(query) 641 - 642 - def num_collectionitems(self, collection): 643 - """Return the total number of items in the specified collection""" 644 - query = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items" 645 - return self._totals(query) 646 - 647 - def _totals(self, query): 648 - """General method for returning total counts""" 649 - self.add_parameters(limit=1) 650 - query = self._build_query(query) 651 - self._retrieve_data(query) 652 - self.url_params = None 653 - # extract the 'total items' figure 654 - return int(self.request.headers["Total-Results"]) 655 - 656 - @retrieve 657 - def key_info(self, **kwargs): 658 - """Retrieve info about the permissions associated with the 659 - key associated to the given Zotero instance 660 - """ 661 - query_string = f"/keys/{self.api_key}" 662 - return self._build_query(query_string) 663 - 664 - @retrieve 665 - def items(self, **kwargs): 666 - """Get user items""" 667 - query_string = "/{t}/{u}/items" 668 - return self._build_query(query_string) 669 - 670 - @retrieve 671 - def settings(self, **kwargs): 672 - """Get synced user settings""" 673 - query_string = "/{t}/{u}/settings" 674 - return self._build_query(query_string) 675 - 676 - @retrieve 677 - def fulltext_item(self, itemkey, **kwargs): 678 - """Get full-text content for an item""" 679 - query_string = ( 680 - f"/{self.library_type}/{self.library_id}/items/{itemkey}/fulltext" 681 - ) 682 - return self._build_query(query_string) 683 - 684 - @backoff_check 685 - def set_fulltext(self, itemkey, payload): 686 - """Set full-text data for an item 687 - <itemkey> should correspond to an existing attachment item. 688 - payload should be a dict containing three keys: 689 - 'content': the full-text content and either 690 - For text documents, 'indexedChars' and 'totalChars' OR 691 - For PDFs, 'indexedPages' and 'totalPages'. 692 - """ 693 - headers = {} 694 - headers.update({"Content-Type": "application/json"}) 695 - return self.client.put( 696 - url=build_url( 697 - self.endpoint, 698 - f"/{self.library_type}/{self.library_id}/items/{itemkey}/fulltext", 699 - ), 700 - headers=headers, 701 - json=payload, 702 - ) 703 - 704 - def new_fulltext(self, since): 705 - """Retrieve list of full-text content items and versions which are newer 706 - than <since> 707 - """ 708 - query_string = f"/{self.library_type}/{self.library_id}/fulltext" 709 - headers = {} 710 - params = {"since": since} 711 - self._check_backoff() 712 - resp = self.client.get( 713 - build_url(self.endpoint, query_string), 714 - params=params, 715 - headers=headers, 716 - ) 717 - try: 718 - resp.raise_for_status() 719 - except httpx.HTTPError as exc: 720 - error_handler(self, resp, exc) 721 - backoff = self.request.headers.get("backoff") or self.request.headers.get( 722 - "retry-after", 723 - ) 724 - if backoff: 725 - self._set_backoff(backoff) 726 - return resp.json() 727 - 728 - def item_versions(self, **kwargs): 729 - """Return dict associating items keys (all no limit by default) to versions. 730 - Accepts a since= parameter in kwargs to limit the data to those updated since since= 731 - """ 732 - if "limit" not in kwargs: 733 - kwargs["limit"] = None 734 - kwargs["format"] = "versions" 735 - return self.items(**kwargs) 736 - 737 - def collection_versions(self, **kwargs): 738 - """Return dict associating collection keys (all no limit by default) to versions. 739 - Accepts a since= parameter in kwargs to limit the data to those updated since since= 740 - """ 741 - if "limit" not in kwargs: 742 - kwargs["limit"] = None 743 - kwargs["format"] = "versions" 744 - return self.collections(**kwargs) 745 - 746 - def last_modified_version(self, **kwargs): 747 - """Get the last modified user or group library version""" 748 - # This MUST be a multiple-object request, limit param notwithstanding 749 - self.items(limit=1) 750 - lmv = self.request.headers.get("last-modified-version", 0) 751 - return int(lmv) 752 - 753 - @retrieve 754 - def top(self, **kwargs): 755 - """Get user top-level items""" 756 - query_string = "/{t}/{u}/items/top" 757 - return self._build_query(query_string) 758 - 759 - @retrieve 760 - def trash(self, **kwargs): 761 - """Get all items in the trash""" 762 - query_string = "/{t}/{u}/items/trash" 763 - return self._build_query(query_string) 764 - 765 - @retrieve 766 - def searches(self, **kwargs): 767 - """Get saved searches""" 768 - query_string = "/{t}/{u}/searches" 769 - return self._build_query(query_string) 770 - 771 - @retrieve 772 - def deleted(self, **kwargs): 773 - """Get all deleted items (requires since= parameter)""" 774 - if "limit" not in kwargs: 775 - # Currently deleted API doesn't respect limit leaving it out by 776 - # default preserves compat 777 - kwargs["limit"] = None 778 - query_string = "/{t}/{u}/deleted" 779 - return self._build_query(query_string) 780 - 781 - @retrieve 782 - def item(self, item, **kwargs): 783 - """Get a specific item""" 784 - query_string = f"/{self.library_type}/{self.library_id}/items/{item.upper()}" 785 - return self._build_query(query_string) 786 - 787 - @retrieve 788 - def file(self, item, **kwargs): 789 - """Get the file from a specific item""" 790 - query_string = ( 791 - f"/{self.library_type}/{self.library_id}/items/{item.upper()}/file" 792 - ) 793 - return self._build_query(query_string, no_params=True) 1 + """Backwards-compatible re-exports for pyzotero.zotero module. 794 2 795 - def dump(self, itemkey, filename=None, path=None): 796 - """Dump a file attachment to disk, with optional filename and path""" 797 - if not filename: 798 - filename = self.item(itemkey)["data"]["filename"] 799 - pth = Path(path) / filename if path else Path(filename) 800 - file = self.file(itemkey) 801 - if self.snapshot: 802 - self.snapshot = False 803 - pth = pth.parent / (pth.name + ".zip") 804 - with pth.open("wb") as f: 805 - f.write(file) 3 + This module maintains backwards compatibility for code that imports from 4 + pyzotero.zotero. New code should import directly from pyzotero. 806 5 807 - @retrieve 808 - def children(self, item, **kwargs): 809 - """Get a specific item's child items""" 810 - query_string = ( 811 - f"/{self.library_type}/{self.library_id}/items/{item.upper()}/children" 812 - ) 813 - return self._build_query(query_string) 814 - 815 - @retrieve 816 - def collection_items(self, collection, **kwargs): 817 - """Get a specific collection's items""" 818 - query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items" 819 - return self._build_query(query_string) 820 - 821 - @retrieve 822 - def collection_items_top(self, collection, **kwargs): 823 - """Get a specific collection's top-level items""" 824 - query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/items/top" 825 - return self._build_query(query_string) 6 + Example: 7 + # Old style (still works) 8 + from pyzotero.zotero import Zotero 826 9 827 - @retrieve 828 - def collection_tags(self, collection, **kwargs): 829 - """Get a specific collection's tags""" 830 - query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/tags" 831 - return self._build_query(query_string) 10 + # New style (preferred) 11 + from pyzotero import Zotero 832 12 833 - @retrieve 834 - def collection(self, collection, **kwargs): 835 - """Get user collection""" 836 - query_string = ( 837 - f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}" 838 - ) 839 - return self._build_query(query_string) 13 + """ 840 14 841 - @retrieve 842 - def collections(self, **kwargs): 843 - """Get user collections""" 844 - query_string = "/{t}/{u}/collections" 845 - return self._build_query(query_string) 15 + # Re-export everything for backwards compatibility 16 + # Also import the errors module for backwards compat 17 + from pyzotero import zotero_errors as ze 18 + from pyzotero._client import Zotero 19 + from pyzotero._decorators import backoff_check, cleanwrap, retrieve, ss_wrap, tcache 20 + from pyzotero._search import SavedSearch 21 + from pyzotero._upload import Zupload 22 + from pyzotero._utils import ( 23 + DEFAULT_ITEM_LIMIT, 24 + DEFAULT_NUM_ITEMS, 25 + DEFAULT_TIMEOUT, 26 + ONE_HOUR, 27 + build_url, 28 + chunks, 29 + merge_params, 30 + token, 31 + ) 32 + from pyzotero.errors import error_handler 846 33 847 - def all_collections(self, collid=None): 848 - """Retrieve all collections and subcollections. Works for top-level collections 849 - or for a specific collection. Works at all collection depths. 850 - """ 851 - all_collections = [] 34 + # Preserve original module-level attributes 35 + __author__ = "Stephan Hügel" 36 + __api_version__ = "3" 852 37 853 - def subcoll(clct): 854 - """Recursively add collections to a flat master list""" 855 - all_collections.append(clct) 856 - if clct["meta"].get("numCollections", 0) > 0: 857 - # add collection to master list & recur with all child 858 - # collections 859 - [ 860 - subcoll(c) 861 - for c in self.everything(self.collections_sub(clct["data"]["key"])) 862 - ] 38 + # Backwards compatibility: the old 'timeout' variable name 39 + timeout = DEFAULT_TIMEOUT 863 40 864 - # select all top-level collections or a specific collection and 865 - # children 866 - if collid: 867 - toplevel = [self.collection(collid)] 868 - else: 869 - toplevel = self.everything(self.collections_top()) 870 - [subcoll(collection) for collection in toplevel] 871 - return all_collections 872 - 873 - @retrieve 874 - def collections_top(self, **kwargs): 875 - """Get top-level user collections""" 876 - query_string = "/{t}/{u}/collections/top" 877 - return self._build_query(query_string) 878 - 879 - @retrieve 880 - def collections_sub(self, collection, **kwargs): 881 - """Get subcollections for a specific collection""" 882 - query_string = f"/{self.library_type}/{self.library_id}/collections/{collection.upper()}/collections" 883 - return self._build_query(query_string) 884 - 885 - @retrieve 886 - def groups(self, **kwargs): 887 - """Get user groups""" 888 - query_string = "/users/{u}/groups" 889 - return self._build_query(query_string) 890 - 891 - @retrieve 892 - def tags(self, **kwargs): 893 - """Get tags""" 894 - query_string = "/{t}/{u}/tags" 895 - self.tag_data = True 896 - return self._build_query(query_string) 897 - 898 - @retrieve 899 - def item_tags(self, item, **kwargs): 900 - """Get tags for a specific item""" 901 - query_string = ( 902 - f"/{self.library_type}/{self.library_id}/items/{item.upper()}/tags" 903 - ) 904 - self.tag_data = True 905 - return self._build_query(query_string) 906 - 907 - def all_top(self, **kwargs): 908 - """Retrieve all top-level items""" 909 - return self.everything(self.top(**kwargs)) 910 - 911 - @retrieve 912 - def follow(self): 913 - """Return the result of the call to the URL in the 'Next' link""" 914 - if n := self.links.get("next"): 915 - return self._striplocal(n) 916 - return None 917 - 918 - def iterfollow(self): 919 - """Return generator for self.follow()""" 920 - # use same criterion as self.follow() 921 - while True: 922 - if self.links.get("next"): 923 - yield self.follow() 924 - else: 925 - return 926 - 927 - def makeiter(self, func): 928 - """Return a generator of func's results""" 929 - if self.links is None or "self" not in self.links: 930 - msg = "makeiter() requires a previous API call with pagination links" 931 - raise RuntimeError(msg) 932 - # reset the link. This results in an extra API call, yes 933 - self.links["next"] = self.links["self"] 934 - return self.iterfollow() 935 - 936 - def everything(self, query): 937 - """Retrieve all items in the library for a particular query 938 - This method will override the 'limit' parameter if it's been set 939 - """ 940 - try: 941 - items = [] 942 - items.extend(query) 943 - while self.links.get("next"): 944 - items.extend(self.follow()) 945 - except TypeError: 946 - # we have a bibliography object ughh 947 - items = copy.deepcopy(query) 948 - while self.links.get("next"): 949 - items.entries.extend(self.follow().entries) 950 - return items 951 - 952 - def get_subset(self, subset): 953 - """Retrieve a subset of items 954 - Accepts a single argument: a list of item IDs 955 - """ 956 - if len(subset) > DEFAULT_NUM_ITEMS: 957 - err = f"You may only retrieve {DEFAULT_NUM_ITEMS} items per call" 958 - raise ze.TooManyItemsError(err) 959 - # remember any url parameters that have been set 960 - params = self.url_params 961 - retr = [] 962 - for itm in subset: 963 - retr.append(self.item(itm)) 964 - self.url_params = params 965 - # clean up URL params when we're finished 966 - self.url_params = None 967 - return retr 968 - 969 - # The following methods process data returned by Read API calls 970 - def _json_processor(self, retrieved): 971 - """Format and return data from API calls which return Items""" 972 - json_kwargs = {} 973 - if self.preserve_json_order: 974 - json_kwargs["object_pairs_hook"] = OrderedDict 975 - # send entries to _tags_data if there's no JSON 976 - try: 977 - items = [ 978 - json.loads(e["content"][0]["value"], **json_kwargs) 979 - for e in retrieved.entries 980 - ] 981 - except KeyError: 982 - return self._tags_data(retrieved) 983 - return items 984 - 985 - def _csljson_processor(self, retrieved): 986 - """Return a list of dicts which are dumped CSL JSON""" 987 - items = [] 988 - json_kwargs = {} 989 - if self.preserve_json_order: 990 - json_kwargs["object_pairs_hook"] = OrderedDict 991 - items = [ 992 - json.loads(entry["content"][0]["value"], **json_kwargs) 993 - for entry in retrieved.entries 994 - ] 995 - self.url_params = None 996 - return items 997 - 998 - def _bib_processor(self, retrieved): 999 - """Return a list of strings formatted as HTML bibliography entries""" 1000 - items = [bib["content"][0]["value"] for bib in retrieved.entries] 1001 - self.url_params = None 1002 - return items 1003 - 1004 - def _citation_processor(self, retrieved): 1005 - """Return a list of strings formatted as HTML citation entries""" 1006 - items = [cit["content"][0]["value"] for cit in retrieved.entries] 1007 - self.url_params = None 1008 - return items 1009 - 1010 - def _tags_data(self, retrieved): 1011 - """Format and return data from API calls which return Tags""" 1012 - self.url_params = None 1013 - return [t["tag"] for t in retrieved] 1014 - 1015 - # The following methods are Write API calls 1016 - def item_template(self, itemtype, linkmode=None): 1017 - """Get a template for a new item""" 1018 - # if we have a template and it hasn't been updated since we stored it 1019 - template_name = f"item_template_{itemtype}_{linkmode or ''}" 1020 - params = {"itemType": itemtype} 1021 - # Set linkMode parameter for API request if itemtype is attachment 1022 - if itemtype == "attachment": 1023 - params["linkMode"] = linkmode 1024 - self.add_parameters(**params) 1025 - query_string = "/items/new" 1026 - if self.templates.get(template_name) and not self._updated( 1027 - query_string, 1028 - self.templates[template_name], 1029 - template_name, 1030 - ): 1031 - return copy.deepcopy(self.templates[template_name]["tmplt"]) 1032 - # otherwise perform a normal request and cache the response 1033 - retrieved = self._retrieve_data(query_string) 1034 - return self._cache(retrieved, template_name) 1035 - 1036 - def _attachment_template(self, attachment_type): 1037 - """Return a new attachment template of the required type: 1038 - imported_file 1039 - imported_url 1040 - linked_file 1041 - linked_url 1042 - """ 1043 - return self.item_template("attachment", linkmode=attachment_type) 1044 - 1045 - def _attachment(self, payload, parentid=None): 1046 - """Create attachments 1047 - accepts a list of one or more attachment template dicts 1048 - and an optional parent Item ID. If this is specified, 1049 - attachments are created under this ID 1050 - """ 1051 - attachment = Zupload(self, payload, parentid) 1052 - return attachment.upload() 1053 - 1054 - @ss_wrap 1055 - def show_operators(self): 1056 - """Show available saved search operators""" 1057 - return self.savedsearch.operators 1058 - 1059 - @ss_wrap 1060 - def show_conditions(self): 1061 - """Show available saved search conditions""" 1062 - return self.savedsearch.conditions_operators.keys() 1063 - 1064 - @ss_wrap 1065 - def show_condition_operators(self, condition): 1066 - """Show available operators for a given saved search condition""" 1067 - # dict keys of allowed operators for the current condition 1068 - permitted_operators = self.savedsearch.conditions_operators.get(condition) 1069 - # transform these into values 1070 - return {self.savedsearch.operators.get(op) for op in permitted_operators} 1071 - 1072 - @ss_wrap 1073 - def saved_search(self, name, conditions): 1074 - """Create a saved search. conditions is a list of dicts 1075 - containing search conditions and must contain the following str keys: 1076 - condition, operator, value 1077 - """ 1078 - self.savedsearch._validate(conditions) 1079 - payload = [{"name": name, "conditions": conditions}] 1080 - headers = {"Zotero-Write-Token": token()} 1081 - self._check_backoff() 1082 - req = self.client.post( 1083 - url=build_url( 1084 - self.endpoint, 1085 - f"/{self.library_type}/{self.library_id}/searches", 1086 - ), 1087 - headers=headers, 1088 - json=payload, 1089 - ) 1090 - self.request = req 1091 - try: 1092 - req.raise_for_status() 1093 - except httpx.HTTPError as exc: 1094 - error_handler(self, req, exc) 1095 - backoff = self.request.headers.get("backoff") or self.request.headers.get( 1096 - "retry-after", 1097 - ) 1098 - if backoff: 1099 - self._set_backoff(backoff) 1100 - return req.json() 1101 - 1102 - @ss_wrap 1103 - def delete_saved_search(self, keys): 1104 - """Delete one or more saved searches by passing a list of one or more 1105 - unique search keys 1106 - """ 1107 - headers = {"Zotero-Write-Token": token()} 1108 - self._check_backoff() 1109 - req = self.client.delete( 1110 - url=build_url( 1111 - self.endpoint, 1112 - f"/{self.library_type}/{self.library_id}/searches", 1113 - ), 1114 - headers=headers, 1115 - params={"searchKey": ",".join(keys)}, 1116 - ) 1117 - self.request = req 1118 - try: 1119 - req.raise_for_status() 1120 - except httpx.HTTPError as exc: 1121 - error_handler(self, req, exc) 1122 - backoff = self.request.headers.get("backoff") or self.request.headers.get( 1123 - "retry-after", 1124 - ) 1125 - if backoff: 1126 - self._set_backoff(backoff) 1127 - return req.status_code 1128 - 1129 - def upload_attachments(self, attachments, parentid=None, basedir=None): 1130 - """Upload files to the already created (but never uploaded) attachments""" 1131 - return Zupload(self, attachments, parentid, basedir=basedir).upload() 1132 - 1133 - def add_tags(self, item, *tags): 1134 - """Add one or more tags to a retrieved item, 1135 - then update it on the server 1136 - Accepts a dict, and one or more tags to add to it 1137 - Returns the updated item from the server 1138 - """ 1139 - # Make sure there's a tags field, or add one 1140 - if not item.get("data", {}).get("tags"): 1141 - item["data"]["tags"] = [] 1142 - for tag in tags: 1143 - item["data"]["tags"].append({"tag": f"{tag}"}) 1144 - # make sure everything's OK 1145 - self.check_items([item]) 1146 - return self.update_item(item) 1147 - 1148 - def check_items(self, items): 1149 - """Check that items to be created contain no invalid dict keys 1150 - Accepts a single argument: a list of one or more dicts 1151 - The retrieved fields are cached and re-used until a 304 call fails 1152 - """ 1153 - params = {"locale": self.locale, "timeout": timeout} 1154 - query_string = "/itemFields" 1155 - r = Request( 1156 - "GET", 1157 - build_url(self.endpoint, query_string), 1158 - params=params, 1159 - ) 1160 - with httpx.Client() as client: 1161 - response = client.send(r) 1162 - # now split up the URL 1163 - result = urlparse(str(response.url)) 1164 - # construct cache key 1165 - cachekey = result.path + "_" + result.query 1166 - if self.templates.get(cachekey) and not self._updated( 1167 - query_string, 1168 - self.templates[cachekey], 1169 - cachekey, 1170 - ): 1171 - template = {t["field"] for t in self.templates[cachekey]["tmplt"]} 1172 - else: 1173 - template = {t["field"] for t in self.item_fields()} 1174 - # add fields we know to be OK 1175 - template |= { 1176 - "path", 1177 - "tags", 1178 - "notes", 1179 - "itemType", 1180 - "creators", 1181 - "mimeType", 1182 - "linkMode", 1183 - "note", 1184 - "charset", 1185 - "dateAdded", 1186 - "version", 1187 - "collections", 1188 - "dateModified", 1189 - "relations", 1190 - # attachment items 1191 - "parentItem", 1192 - "mtime", 1193 - "contentType", 1194 - "md5", 1195 - "filename", 1196 - "inPublications", 1197 - # annotation fields 1198 - "annotationText", 1199 - "annotationColor", 1200 - "annotationType", 1201 - "annotationPageLabel", 1202 - "annotationPosition", 1203 - "annotationSortIndex", 1204 - "annotationComment", 1205 - "annotationAuthorName", 1206 - } 1207 - template |= set(self.temp_keys) 1208 - processed_items = [] 1209 - for pos, item in enumerate(items): 1210 - if set(item) == {"links", "library", "version", "meta", "key", "data"}: 1211 - itm = item["data"] 1212 - else: 1213 - itm = item 1214 - to_check = set(itm.keys()) 1215 - difference = to_check.difference(template) 1216 - if difference: 1217 - err = f"Invalid keys present in item {pos + 1}: {' '.join(i for i in difference)}" 1218 - raise ze.InvalidItemFieldsError( 1219 - err, 1220 - ) 1221 - processed_items.append(itm) 1222 - return processed_items 1223 - 1224 - @tcache 1225 - def item_types(self): 1226 - """Get all available item types""" 1227 - # Check for a valid cached version 1228 - params = {"locale": self.locale} 1229 - query_string = "/itemTypes" 1230 - return query_string, params 1231 - 1232 - @tcache 1233 - def creator_fields(self): 1234 - """Get localised creator fields""" 1235 - # Check for a valid cached version 1236 - params = {"locale": self.locale} 1237 - query_string = "/creatorFields" 1238 - return query_string, params 1239 - 1240 - @tcache 1241 - def item_type_fields(self, itemtype): 1242 - """Get all valid fields for an item""" 1243 - params = {"itemType": itemtype, "locale": self.locale} 1244 - query_string = "/itemTypeFields" 1245 - return query_string, params 1246 - 1247 - @tcache 1248 - def item_creator_types(self, itemtype): 1249 - """Get all available creator types for an item""" 1250 - params = {"itemType": itemtype, "locale": self.locale} 1251 - query_string = "/itemTypeCreatorTypes" 1252 - return query_string, params 1253 - 1254 - @tcache 1255 - def item_fields(self): 1256 - """Get all available item fields""" 1257 - # Check for a valid cached version 1258 - params = {"locale": self.locale} 1259 - query_string = "/itemFields" 1260 - return query_string, params 1261 - 1262 - def item_attachment_link_modes(): 1263 - """Get all available link mode types. 1264 - Note: No viable REST API route was found for this, so I tested and built a list from documentation found 1265 - here - https://www.zotero.org/support/dev/web_api/json 1266 - """ 1267 - return ["imported_file", "imported_url", "linked_file", "linked_url"] 1268 - 1269 - def create_items(self, payload, parentid=None, last_modified=None): 1270 - """Create new Zotero items 1271 - Accepts two arguments: 1272 - a list containing one or more item dicts 1273 - an optional parent item ID. 1274 - Note that this can also be used to update existing items 1275 - """ 1276 - if len(payload) > DEFAULT_NUM_ITEMS: 1277 - msg = f"You may only create up to {DEFAULT_NUM_ITEMS} items per call" 1278 - raise ze.TooManyItemsError(msg) 1279 - # TODO: strip extra data if it's an existing item 1280 - headers = {"Zotero-Write-Token": token(), "Content-Type": "application/json"} 1281 - if last_modified is not None: 1282 - headers["If-Unmodified-Since-Version"] = str(last_modified) 1283 - to_send = list(self._cleanup(*payload, allow=("key"))) 1284 - self._check_backoff() 1285 - req = self.client.post( 1286 - url=build_url( 1287 - self.endpoint, 1288 - f"/{self.library_type}/{self.library_id}/items", 1289 - ), 1290 - content=json.dumps(to_send), 1291 - headers=dict(headers), 1292 - ) 1293 - self.request = req 1294 - try: 1295 - req.raise_for_status() 1296 - except httpx.HTTPError as exc: 1297 - error_handler(self, req, exc) 1298 - resp = req.json() 1299 - backoff = self.request.headers.get("backoff") or self.request.headers.get( 1300 - "retry-after", 1301 - ) 1302 - if backoff: 1303 - self._set_backoff(backoff) 1304 - if parentid: 1305 - # we need to create child items using PATCH 1306 - # TODO: handle possibility of item creation + failed parent 1307 - # attachment 1308 - uheaders = { 1309 - "If-Unmodified-Since-Version": req.headers["last-modified-version"], 1310 - } 1311 - for value in resp["success"].values(): 1312 - payload = {"parentItem": parentid} 1313 - self._check_backoff() 1314 - presp = self.client.patch( 1315 - url=build_url( 1316 - self.endpoint, 1317 - f"/{self.library_type}/{self.library_id}/items/{value}", 1318 - ), 1319 - json=payload, 1320 - headers=dict(uheaders), 1321 - ) 1322 - self.request = presp 1323 - try: 1324 - presp.raise_for_status() 1325 - except httpx.HTTPError as exc: 1326 - error_handler(self, presp, exc) 1327 - backoff = presp.headers.get("backoff") or presp.headers.get( 1328 - "retry-after", 1329 - ) 1330 - if backoff: 1331 - self._set_backoff(backoff) 1332 - return resp 1333 - 1334 - def create_collection(self, payload, last_modified=None): 1335 - """Alias for create_collections to preserve backward compatibility""" 1336 - return self.create_collections(payload, last_modified) 1337 - 1338 - def create_collections(self, payload, last_modified=None): 1339 - """Create new Zotero collections 1340 - Accepts one argument, a list of dicts containing the following keys: 1341 - 1342 - 'name': the name of the collection 1343 - 'parentCollection': OPTIONAL, the parent collection to which you wish to add this 1344 - """ 1345 - # no point in proceeding if there's no 'name' key 1346 - for item in payload: 1347 - if "name" not in item: 1348 - msg = "The dict you pass must include a 'name' key" 1349 - raise ze.ParamNotPassedError(msg) 1350 - # add a blank 'parentCollection' key if it hasn't been passed 1351 - if "parentCollection" not in item: 1352 - item["parentCollection"] = "" 1353 - headers = {"Zotero-Write-Token": token()} 1354 - if last_modified is not None: 1355 - headers["If-Unmodified-Since-Version"] = str(last_modified) 1356 - self._check_backoff() 1357 - req = self.client.post( 1358 - url=build_url( 1359 - self.endpoint, 1360 - f"/{self.library_type}/{self.library_id}/collections", 1361 - ), 1362 - headers=headers, 1363 - content=json.dumps(payload), 1364 - ) 1365 - self.request = req 1366 - try: 1367 - req.raise_for_status() 1368 - except httpx.HTTPError as exc: 1369 - error_handler(self, req, exc) 1370 - backoff = req.headers.get("backoff") or req.headers.get("retry-after") 1371 - if backoff: 1372 - self._set_backoff(backoff) 1373 - return req.json() 1374 - 1375 - @backoff_check 1376 - def update_collection(self, payload, last_modified=None): 1377 - """Update a Zotero collection property such as 'name' 1378 - Accepts one argument, a dict containing collection data retrieved 1379 - using e.g. 'collections()' 1380 - """ 1381 - modified = payload["version"] 1382 - if last_modified is not None: 1383 - modified = last_modified 1384 - key = payload["key"] 1385 - headers = {"If-Unmodified-Since-Version": str(modified)} 1386 - headers.update({"Content-Type": "application/json"}) 1387 - return self.client.put( 1388 - url=build_url( 1389 - self.endpoint, 1390 - f"/{self.library_type}/{self.library_id}/collections/{key}", 1391 - ), 1392 - headers=headers, 1393 - content=json.dumps(payload), 1394 - ) 1395 - 1396 - def attachment_simple(self, files, parentid=None): 1397 - """Add attachments using filenames as title 1398 - Arguments: 1399 - One or more file paths to add as attachments: 1400 - An optional Item ID, which will create child attachments 1401 - """ 1402 - orig = self._attachment_template("imported_file") 1403 - to_add = [orig.copy() for fls in files] 1404 - for idx, tmplt in enumerate(to_add): 1405 - tmplt["title"] = Path(files[idx]).name 1406 - tmplt["filename"] = files[idx] 1407 - if parentid: 1408 - return self._attachment(to_add, parentid) 1409 - return self._attachment(to_add) 1410 - 1411 - def attachment_both(self, files, parentid=None): 1412 - """Add child attachments using title, filename 1413 - Arguments: 1414 - One or more lists or tuples containing title, file path 1415 - An optional Item ID, which will create child attachments 1416 - """ 1417 - orig = self._attachment_template("imported_file") 1418 - to_add = [orig.copy() for f in files] 1419 - for idx, tmplt in enumerate(to_add): 1420 - tmplt["title"] = files[idx][0] 1421 - tmplt["filename"] = files[idx][1] 1422 - if parentid: 1423 - return self._attachment(to_add, parentid) 1424 - return self._attachment(to_add) 1425 - 1426 - @backoff_check 1427 - def update_item(self, payload, last_modified=None): 1428 - """Update an existing item 1429 - Accepts one argument, a dict containing Item data 1430 - """ 1431 - to_send = self.check_items([payload])[0] 1432 - modified = payload["version"] if last_modified is None else last_modified 1433 - ident = payload["key"] 1434 - headers = {"If-Unmodified-Since-Version": str(modified)} 1435 - return self.client.patch( 1436 - url=build_url( 1437 - self.endpoint, 1438 - f"/{self.library_type}/{self.library_id}/items/{ident}", 1439 - ), 1440 - headers=headers, 1441 - content=json.dumps(to_send), 1442 - ) 1443 - 1444 - def update_items(self, payload): 1445 - """Update existing items 1446 - Accepts one argument, a list of dicts containing Item data 1447 - """ 1448 - to_send = [self.check_items([p])[0] for p in payload] 1449 - # the API only accepts 50 items at a time, so we have to split 1450 - # anything longer 1451 - for chunk in chunks(to_send, DEFAULT_NUM_ITEMS): 1452 - self._check_backoff() 1453 - req = self.client.post( 1454 - url=build_url( 1455 - self.endpoint, 1456 - f"/{self.library_type}/{self.library_id}/items/", 1457 - ), 1458 - json=chunk, 1459 - ) 1460 - self.request = req 1461 - try: 1462 - req.raise_for_status() 1463 - except httpx.HTTPError as exc: 1464 - error_handler(self, req, exc) 1465 - backoff = req.headers.get("backoff") or req.headers.get("retry-after") 1466 - if backoff: 1467 - self._set_backoff(backoff) 1468 - return True 1469 - 1470 - def update_collections(self, payload): 1471 - """Update existing collections 1472 - Accepts one argument, a list of dicts containing Collection data 1473 - """ 1474 - to_send = [self.check_items([p])[0] for p in payload] 1475 - # the API only accepts 50 items at a time, so we have to split 1476 - # anything longer 1477 - for chunk in chunks(to_send, DEFAULT_NUM_ITEMS): 1478 - self._check_backoff() 1479 - req = self.client.post( 1480 - url=build_url( 1481 - self.endpoint, 1482 - f"/{self.library_type}/{self.library_id}/collections/", 1483 - ), 1484 - json=chunk, 1485 - ) 1486 - self.request = req 1487 - try: 1488 - req.raise_for_status() 1489 - except httpx.HTTPError as exc: 1490 - error_handler(self, req, exc) 1491 - backoff = req.headers.get("backoff") or req.headers.get("retry-after") 1492 - if backoff: 1493 - self._set_backoff(backoff) 1494 - return True 1495 - 1496 - @backoff_check 1497 - def addto_collection(self, collection, payload): 1498 - """Add item to a collection 1499 - Accepts two arguments: 1500 - The collection ID, and an item dict 1501 - """ 1502 - ident = payload["key"] 1503 - modified = payload["version"] 1504 - # add the collection data from the item 1505 - modified_collections = payload["data"]["collections"] + [collection] 1506 - headers = {"If-Unmodified-Since-Version": str(modified)} 1507 - return self.client.patch( 1508 - url=build_url( 1509 - self.endpoint, 1510 - f"/{self.library_type}/{self.library_id}/items/{ident}", 1511 - ), 1512 - json={"collections": modified_collections}, 1513 - headers=headers, 1514 - ) 1515 - 1516 - @backoff_check 1517 - def deletefrom_collection(self, collection, payload): 1518 - """Delete an item from a collection 1519 - Accepts two arguments: 1520 - The collection ID, and and an item dict 1521 - """ 1522 - ident = payload["key"] 1523 - modified = payload["version"] 1524 - # strip the collection data from the item 1525 - modified_collections = [ 1526 - c for c in payload["data"]["collections"] if c != collection 1527 - ] 1528 - headers = {"If-Unmodified-Since-Version": str(modified)} 1529 - return self.client.patch( 1530 - url=build_url( 1531 - self.endpoint, 1532 - f"/{self.library_type}/{self.library_id}/items/{ident}", 1533 - ), 1534 - json={"collections": modified_collections}, 1535 - headers=headers, 1536 - ) 1537 - 1538 - @backoff_check 1539 - def delete_tags(self, *payload): 1540 - """Delete a group of tags 1541 - pass in up to 50 tags, or use *[tags] 1542 - 1543 - """ 1544 - if len(payload) > DEFAULT_NUM_ITEMS: 1545 - msg = f"Only {DEFAULT_NUM_ITEMS} tags or fewer may be deleted" 1546 - raise ze.TooManyItemsError(msg) 1547 - modified_tags = " || ".join(list(payload)) 1548 - # first, get version data by getting one tag 1549 - self.tags(limit=1) 1550 - headers = { 1551 - "If-Unmodified-Since-Version": self.request.headers[ 1552 - "last-modified-version" 1553 - ], 1554 - } 1555 - return self.client.delete( 1556 - url=build_url( 1557 - self.endpoint, 1558 - f"/{self.library_type}/{self.library_id}/tags", 1559 - ), 1560 - params={"tag": modified_tags}, 1561 - headers=headers, 1562 - ) 1563 - 1564 - @backoff_check 1565 - def delete_item(self, payload, last_modified=None): 1566 - """Delete Items from a Zotero library 1567 - Accepts a single argument: 1568 - a dict containing item data 1569 - OR a list of dicts containing item data 1570 - """ 1571 - params = None 1572 - if isinstance(payload, list): 1573 - params = {"itemKey": ",".join([p["key"] for p in payload])} 1574 - if last_modified is not None: 1575 - modified = last_modified 1576 - else: 1577 - modified = payload[0]["version"] 1578 - url = build_url( 1579 - self.endpoint, 1580 - f"/{self.library_type}/{self.library_id}/items", 1581 - ) 1582 - else: 1583 - ident = payload["key"] 1584 - if last_modified is not None: 1585 - modified = last_modified 1586 - else: 1587 - modified = payload["version"] 1588 - url = build_url( 1589 - self.endpoint, 1590 - f"/{self.library_type}/{self.library_id}/items/{ident}", 1591 - ) 1592 - headers = {"If-Unmodified-Since-Version": str(modified)} 1593 - return self.client.delete(url=url, params=params, headers=headers) 1594 - 1595 - @backoff_check 1596 - def delete_collection(self, payload, last_modified=None): 1597 - """Delete a Collection from a Zotero library 1598 - Accepts a single argument: 1599 - a dict containing item data 1600 - OR a list of dicts containing item data 1601 - """ 1602 - params = None 1603 - if isinstance(payload, list): 1604 - params = {"collectionKey": ",".join([p["key"] for p in payload])} 1605 - if last_modified is not None: 1606 - modified = last_modified 1607 - else: 1608 - modified = payload[0]["version"] 1609 - url = build_url( 1610 - self.endpoint, 1611 - f"/{self.library_type}/{self.library_id}/collections", 1612 - ) 1613 - else: 1614 - ident = payload["key"] 1615 - if last_modified is not None: 1616 - modified = last_modified 1617 - else: 1618 - modified = payload["version"] 1619 - url = build_url( 1620 - self.endpoint, 1621 - f"/{self.library_type}/{self.library_id}/collections/{ident}", 1622 - ) 1623 - headers = {"If-Unmodified-Since-Version": str(modified)} 1624 - return self.client.delete(url=url, params=params, headers=headers) 1625 - 1626 - 1627 - def error_handler(zot, req, exc=None): 1628 - """Error handler for HTTP requests""" 1629 - error_codes = { 1630 - 400: ze.UnsupportedParamsError, 1631 - 401: ze.UserNotAuthorisedError, 1632 - 403: ze.UserNotAuthorisedError, 1633 - 404: ze.ResourceNotFoundError, 1634 - 409: ze.ConflictError, 1635 - 412: ze.PreConditionFailedError, 1636 - 413: ze.RequestEntityTooLargeError, 1637 - 428: ze.PreConditionRequiredError, 1638 - 429: ze.TooManyRequestsError, 1639 - } 1640 - 1641 - def err_msg(req): 1642 - """Return a nicely-formatted error message""" 1643 - return f"\nCode: {req.status_code}\nURL: {req.url!s}\nMethod: {req.request.method}\nResponse: {req.text}" 1644 - 1645 - if error_codes.get(req.status_code): 1646 - # check to see whether its 429 1647 - if req.status_code == httpx.codes.TOO_MANY_REQUESTS: 1648 - # try to get backoff or delay duration 1649 - delay = req.headers.get("backoff") or req.headers.get("retry-after") 1650 - if not delay: 1651 - msg = "You are being rate-limited and no backoff or retry duration has been received from the server. Try again later" 1652 - raise ze.TooManyRetriesError(msg) 1653 - zot._set_backoff(delay) 1654 - elif not exc: 1655 - raise error_codes[req.status_code](err_msg(req)) 1656 - else: 1657 - raise error_codes[req.status_code]( 1658 - err_msg(req) 1659 - ) from exc # ← Direct indexing 1660 - elif not exc: 1661 - raise ze.HTTPError(err_msg(req)) 1662 - else: 1663 - raise ze.HTTPError(err_msg(req)) from exc 1664 - 1665 - 1666 - class SavedSearch: 1667 - """Saved search functionality 1668 - See https://github.com/zotero/zotero/blob/master/chrome/content/zotero/xpcom/data/searchConditions.js 1669 - """ 1670 - 1671 - def __init__(self, zinstance): 1672 - super().__init__() 1673 - self.zinstance = zinstance 1674 - self.searchkeys = ("condition", "operator", "value") 1675 - # always exclude these fields from zotero.item_keys() 1676 - self.excluded_items = ( 1677 - "accessDate", 1678 - "date", 1679 - "pages", 1680 - "section", 1681 - "seriesNumber", 1682 - "issue", 1683 - ) 1684 - self.operators = { 1685 - # this is a bit hacky, but I can't be bothered with Python's enums 1686 - "is": "is", 1687 - "isNot": "isNot", 1688 - "beginsWith": "beginsWith", 1689 - "contains": "contains", 1690 - "doesNotContain": "doesNotContain", 1691 - "isLessThan": "isLessThan", 1692 - "isGreaterThan": "isGreaterThan", 1693 - "isBefore": "isBefore", 1694 - "isAfter": "isAfter", 1695 - "isInTheLast": "isInTheLast", 1696 - "any": "any", 1697 - "all": "all", 1698 - "true": "true", 1699 - "false": "false", 1700 - } 1701 - # common groupings of operators 1702 - self.groups = { 1703 - "A": (self.operators["true"], self.operators["false"]), 1704 - "B": (self.operators["any"], self.operators["all"]), 1705 - "C": ( 1706 - self.operators["is"], 1707 - self.operators["isNot"], 1708 - self.operators["contains"], 1709 - self.operators["doesNotContain"], 1710 - ), 1711 - "D": (self.operators["is"], self.operators["isNot"]), 1712 - "E": ( 1713 - self.operators["is"], 1714 - self.operators["isNot"], 1715 - self.operators["isBefore"], 1716 - self.operators["isInTheLast"], 1717 - ), 1718 - "F": (self.operators["contains"], self.operators["doesNotContain"]), 1719 - "G": ( 1720 - self.operators["is"], 1721 - self.operators["isNot"], 1722 - self.operators["contains"], 1723 - self.operators["doesNotContain"], 1724 - self.operators["isLessThan"], 1725 - self.operators["isGreaterThan"], 1726 - ), 1727 - "H": ( 1728 - self.operators["is"], 1729 - self.operators["isNot"], 1730 - self.operators["beginsWith"], 1731 - ), 1732 - "I": (self.operators["is"]), 1733 - } 1734 - self.conditions_operators = { 1735 - "deleted": self.groups["A"], 1736 - "noChildren": self.groups["A"], 1737 - "unfiled": self.groups["A"], 1738 - "publications": self.groups["A"], 1739 - "retracted": self.groups["A"], 1740 - "includeParentsAndChildren": self.groups["A"], 1741 - "includeParents": self.groups["A"], 1742 - "includeChildren": self.groups["A"], 1743 - "recursive": self.groups["A"], 1744 - "joinMode": self.groups["B"], 1745 - "quicksearch-titleCreatorYear": self.groups["C"], 1746 - "quicksearch-titleCreatorYearNote": self.groups["C"], 1747 - "quicksearch-fields": self.groups["C"], 1748 - "quicksearch-everything": self.groups["C"], 1749 - "collectionID": self.groups["D"], 1750 - "savedSearchID": self.groups["D"], 1751 - "collection": self.groups["D"], 1752 - "savedSearch": self.groups["D"], 1753 - "dateAdded": self.groups["E"], 1754 - "dateModified": self.groups["E"], 1755 - "itemType": self.groups["D"], 1756 - "fileTypeID": self.groups["D"], 1757 - "tagID": self.groups["D"], 1758 - "tag": self.groups["C"], 1759 - "note": self.groups["F"], 1760 - "childNote": self.groups["F"], 1761 - "creator": self.groups["C"], 1762 - "lastName": self.groups["C"], 1763 - "field": self.groups["C"], 1764 - "datefield": self.groups["E"], 1765 - "year": self.groups["C"], 1766 - "numberfield": self.groups["G"], 1767 - "libraryID": self.groups["D"], 1768 - "key": self.groups["H"], 1769 - "itemID": self.groups["D"], 1770 - "annotationText": self.groups["F"], 1771 - "annotationComment": self.groups["F"], 1772 - "fulltextWord": self.groups["F"], 1773 - "fulltextContent": self.groups["F"], 1774 - "tempTable": self.groups["I"], 1775 - } 1776 - ########### 1777 - # ALIASES # 1778 - ########### 1779 - # aliases for numberfield 1780 - pagefields = ( 1781 - "pages", 1782 - "numPages", 1783 - "numberOfVolumes", 1784 - "section", 1785 - "seriesNumber", 1786 - "issue", 1787 - ) 1788 - for pf in pagefields: 1789 - self.conditions_operators[pf] = self.conditions_operators.get("numberfield") 1790 - # aliases for datefield 1791 - datefields = ("accessDate", "date", "dateDue", "accepted") 1792 - for df in datefields: 1793 - self.conditions_operators[df] = self.conditions_operators.get("datefield") 1794 - # aliases for field - this makes a blocking API call unless item types have been cached 1795 - item_fields = [ 1796 - itm["field"] 1797 - for itm in self.zinstance.item_fields() 1798 - if itm["field"] not in set(self.excluded_items) 1799 - ] 1800 - for itf in item_fields: 1801 - self.conditions_operators[itf] = self.conditions_operators.get("field") 1802 - 1803 - def _validate(self, conditions): 1804 - """Validate saved search conditions, raising an error if any contain invalid operators""" 1805 - allowed_keys = set(self.searchkeys) 1806 - operators_set = set(self.operators.keys()) 1807 - for condition in conditions: 1808 - if set(condition.keys()) != allowed_keys: 1809 - msg = f"Keys must be all of: {', '.join(self.searchkeys)}" 1810 - raise ze.ParamNotPassedError( 1811 - msg, 1812 - ) 1813 - if condition.get("operator") not in operators_set: 1814 - msg = f"You have specified an unknown operator: {condition.get('operator')}" 1815 - raise ze.ParamNotPassedError( 1816 - msg, 1817 - ) 1818 - # dict keys of allowed operators for the current condition 1819 - permitted_operators = self.conditions_operators.get( 1820 - condition.get("condition"), 1821 - ) 1822 - if permitted_operators is None: 1823 - msg = f"Unknown condition: {condition.get('condition')}" 1824 - raise ze.ParamNotPassedError(msg) 1825 - # transform these into values 1826 - permitted_operators_list = { 1827 - op_value 1828 - for op in permitted_operators 1829 - if (op_value := self.operators.get(op)) is not None 1830 - } 1831 - if condition.get("operator") not in permitted_operators_list: 1832 - msg = f"You may not use the '{condition.get('operator')}' operator when selecting the '{condition.get('condition')}' condition. \nAllowed operators: {', '.join(list(permitted_operators_list))}" 1833 - raise ze.ParamNotPassedError( 1834 - msg, 1835 - ) 1836 - 1837 - 1838 - class Zupload: 1839 - """Zotero file attachment helper 1840 - Receives a Zotero instance, file(s) to upload, and optional parent ID 1841 - 1842 - """ 1843 - 1844 - def __init__(self, zinstance, payload, parentid=None, basedir=None): 1845 - super().__init__() 1846 - self.zinstance = zinstance 1847 - self.payload = payload 1848 - self.parentid = parentid 1849 - if basedir is None: 1850 - self.basedir = Path() 1851 - elif isinstance(basedir, Path): 1852 - self.basedir = basedir 1853 - else: 1854 - self.basedir = Path(basedir) 1855 - 1856 - def _verify(self, payload): 1857 - """Ensure that all files to be attached exist 1858 - open()'s better than exists(), cos it avoids a race condition 1859 - """ 1860 - if not payload: # Check payload has nonzero length 1861 - raise ze.ParamNotPassedError 1862 - for templt in payload: 1863 - if Path(str(self.basedir.joinpath(templt["filename"]))).is_file(): 1864 - try: 1865 - # if it is a file, try to open it, and catch the error 1866 - with Path(str(self.basedir.joinpath(templt["filename"]))).open(): 1867 - pass 1868 - except OSError: 1869 - msg = f"The file at {self.basedir.joinpath(templt['filename'])!s} couldn't be opened or found." 1870 - raise ze.FileDoesNotExistError( 1871 - msg, 1872 - ) from None 1873 - # no point in continuing if the file isn't a file 1874 - else: 1875 - msg = f"The file at {self.basedir.joinpath(templt['filename'])!s} couldn't be opened or found." 1876 - raise ze.FileDoesNotExistError( 1877 - msg, 1878 - ) 1879 - 1880 - def _create_prelim(self): 1881 - """Step 0: Register intent to upload files""" 1882 - self._verify(self.payload) 1883 - if "key" in self.payload[0] and self.payload[0]["key"]: 1884 - if next((i for i in self.payload if "key" not in i), False): 1885 - msg = "Can't pass payload entries with and without keys to Zupload" 1886 - raise ze.UnsupportedParamsError( 1887 - msg, 1888 - ) 1889 - return None # Don't do anything if payload comes with keys 1890 - # Set contentType for each attachment if not already provided 1891 - for item in self.payload: 1892 - if not item.get("contentType"): 1893 - filepath = str(self.basedir.joinpath(item["filename"])) 1894 - detected_type = mimetypes.guess_type(filepath)[0] 1895 - item["contentType"] = detected_type or "application/octet-stream" 1896 - liblevel = "/{t}/{u}/items" 1897 - # Create one or more new attachments 1898 - headers = {"Zotero-Write-Token": token(), "Content-Type": "application/json"} 1899 - # If we have a Parent ID, add it as a parentItem 1900 - if self.parentid: 1901 - for child in self.payload: 1902 - child["parentItem"] = self.parentid 1903 - to_send = json.dumps(self.payload) 1904 - self.zinstance._check_backoff() 1905 - req = self.zinstance.client.post( 1906 - url=build_url( 1907 - self.zinstance.endpoint, 1908 - liblevel.format( 1909 - t=self.zinstance.library_type, 1910 - u=self.zinstance.library_id, 1911 - ), 1912 - ), 1913 - data=to_send, 1914 - headers=headers, 1915 - ) 1916 - try: 1917 - req.raise_for_status() 1918 - except httpx.HTTPError as exc: 1919 - error_handler(self.zinstance, req, exc) 1920 - backoff = req.headers.get("backoff") or req.headers.get("retry-after") 1921 - if backoff: 1922 - self.zinstance._set_backoff(backoff) 1923 - data = req.json() 1924 - for k in data["success"]: 1925 - self.payload[int(k)]["key"] = data["success"][k] 1926 - return data 1927 - 1928 - def _get_auth(self, attachment, reg_key, md5=None): 1929 - """Step 1: get upload authorisation for a file""" 1930 - mtypes = mimetypes.guess_type(attachment) 1931 - digest = hashlib.md5() # noqa: S324 1932 - with Path(attachment).open("rb") as att: 1933 - for chunk in iter(lambda: att.read(8192), b""): 1934 - digest.update(chunk) 1935 - auth_headers = {"Content-Type": "application/x-www-form-urlencoded"} 1936 - if not md5: 1937 - auth_headers["If-None-Match"] = "*" 1938 - else: 1939 - # docs specify that for existing file we use this 1940 - auth_headers["If-Match"] = md5 1941 - data = { 1942 - "md5": digest.hexdigest(), 1943 - "filename": Path(attachment).name, 1944 - "filesize": Path(attachment).stat().st_size, 1945 - "mtime": str(int(Path(attachment).stat().st_mtime * 1000)), 1946 - "contentType": mtypes[0] or "application/octet-stream", 1947 - "charset": mtypes[1], 1948 - "params": 1, 1949 - } 1950 - self.zinstance._check_backoff() 1951 - auth_req = self.zinstance.client.post( 1952 - url=build_url( 1953 - self.zinstance.endpoint, 1954 - f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file", 1955 - ), 1956 - data=data, 1957 - headers=auth_headers, 1958 - ) 1959 - try: 1960 - auth_req.raise_for_status() 1961 - except httpx.HTTPError as exc: 1962 - error_handler(self.zinstance, auth_req, exc) 1963 - backoff = auth_req.headers.get("backoff") or auth_req.headers.get("retry-after") 1964 - if backoff: 1965 - self.zinstance._set_backoff(backoff) 1966 - return auth_req.json() 1967 - 1968 - def _upload_file(self, authdata, attachment, reg_key): 1969 - """Step 2: auth successful, and file not on server 1970 - zotero.org/support/dev/server_api/file_upload#a_full_upload 1971 - 1972 - reg_key isn't used, but we need to pass it through to Step 3 1973 - """ 1974 - upload_dict = authdata["params"] 1975 - # pass tuple of tuples (not dict!), to ensure key comes first 1976 - upload_list = [("key", upload_dict.pop("key"))] 1977 - for key, value in upload_dict.items(): 1978 - upload_list.append((key, value)) 1979 - upload_list.append(("file", Path(attachment).open("rb").read())) 1980 - upload_pairs = tuple(upload_list) 1981 - try: 1982 - self.zinstance._check_backoff() 1983 - # We use a fresh httpx POST because we don't want our existing Pyzotero headers 1984 - # for a call to the storage upload URL (currently S3) 1985 - upload = httpx.post( 1986 - url=authdata["url"], 1987 - files=upload_pairs, 1988 - headers={"User-Agent": f"Pyzotero/{pz.__version__}"}, 1989 - ) 1990 - except httpx.ConnectError: 1991 - msg = "ConnectionError" 1992 - raise ze.UploadError(msg) from None 1993 - try: 1994 - upload.raise_for_status() 1995 - except httpx.HTTPError as exc: 1996 - error_handler(self.zinstance, upload, exc) 1997 - backoff = upload.headers.get("backoff") or upload.headers.get("retry-after") 1998 - if backoff: 1999 - self.zinstance._set_backoff(backoff) 2000 - # now check the responses 2001 - return self._register_upload(authdata, reg_key) 2002 - 2003 - def _register_upload(self, authdata, reg_key): 2004 - """Step 3: upload successful, so register it""" 2005 - reg_headers = { 2006 - "Content-Type": "application/x-www-form-urlencoded", 2007 - "If-None-Match": "*", 2008 - } 2009 - reg_data = {"upload": authdata.get("uploadKey")} 2010 - self.zinstance._check_backoff() 2011 - upload_reg = self.zinstance.client.post( 2012 - url=build_url( 2013 - self.zinstance.endpoint, 2014 - f"/{self.zinstance.library_type}/{self.zinstance.library_id}/items/{reg_key}/file", 2015 - ), 2016 - data=reg_data, 2017 - headers=dict(reg_headers), 2018 - ) 2019 - try: 2020 - upload_reg.raise_for_status() 2021 - except httpx.HTTPError as exc: 2022 - error_handler(self.zinstance, upload_reg, exc) 2023 - backoff = upload_reg.headers.get("backoff") or upload_reg.headers.get( 2024 - "retry-after", 2025 - ) 2026 - if backoff: 2027 - self.zinstance._set_backoff(backoff) 2028 - 2029 - def upload(self): 2030 - """File upload functionality 2031 - 2032 - Goes through upload steps 0 - 3 (private class methods), and returns 2033 - a dict noting success, failure, or unchanged 2034 - (returning the payload entries with that property as a list for each status) 2035 - """ 2036 - result = {"success": [], "failure": [], "unchanged": []} 2037 - self._create_prelim() 2038 - for item in self.payload: 2039 - if "key" not in item: 2040 - result["failure"].append(item) 2041 - continue 2042 - attach = str(self.basedir.joinpath(item["filename"])) 2043 - authdata = self._get_auth(attach, item["key"], md5=item.get("md5", None)) 2044 - # no need to keep going if the file exists 2045 - if authdata.get("exists"): 2046 - result["unchanged"].append(item) 2047 - continue 2048 - self._upload_file(authdata, attach, item["key"]) 2049 - result["success"].append(item) 2050 - return result 41 + __all__ = [ 42 + # Constants 43 + "DEFAULT_ITEM_LIMIT", 44 + "DEFAULT_NUM_ITEMS", 45 + "DEFAULT_TIMEOUT", 46 + "ONE_HOUR", 47 + "SavedSearch", 48 + # Classes 49 + "Zotero", 50 + "Zupload", 51 + # Module attributes 52 + "__api_version__", 53 + "__author__", 54 + # Decorators 55 + "backoff_check", 56 + # Utility functions 57 + "build_url", 58 + "chunks", 59 + "cleanwrap", 60 + "error_handler", 61 + "merge_params", 62 + "retrieve", 63 + "ss_wrap", 64 + "tcache", 65 + "timeout", 66 + "token", 67 + # Backwards compat 68 + "ze", 69 + ]
+52 -135
src/pyzotero/zotero_errors.py
··· 1 - """ 2 - zotero_errors.py 3 - 4 - Created by Stephan Hügel on 2011-03-04 5 - 6 - This file is part of Pyzotero. 7 - """ 8 - 9 - 10 - # Define some exceptions 11 - class PyZoteroError(Exception): 12 - """Generic parent exception""" 13 - 14 - pass 1 + """Backwards-compatible re-exports for pyzotero.zotero_errors module. 15 2 3 + This module maintains backwards compatibility for code that imports from 4 + pyzotero.zotero_errors. New code should import from pyzotero.errors. 16 5 17 - class ParamNotPassedError(PyZoteroError): 18 - """Raised if a parameter which is required isn't passed""" 6 + Example: 7 + # Old style (still works) 8 + from pyzotero import zotero_errors as ze 19 9 20 - pass 21 - 22 - 23 - class CallDoesNotExistError(PyZoteroError): 24 - """Raised if the specified API call doesn't exist""" 25 - 26 - pass 27 - 28 - 29 - class UnsupportedParamsError(PyZoteroError): 30 - """Raised when unsupported parameters are passed""" 31 - 32 - pass 33 - 34 - 35 - class UserNotAuthorisedError(PyZoteroError): 36 - """Raised when the user is not allowed to retrieve the resource""" 37 - 38 - pass 39 - 40 - 41 - class TooManyItemsError(PyZoteroError): 42 - """Raised when too many items are passed to a Write API method""" 10 + # New style (preferred) 11 + from pyzotero import errors as ze 43 12 44 - pass 13 + """ 45 14 15 + # Re-export all exceptions for backwards compatibility 16 + from pyzotero.errors import ( 17 + CallDoesNotExistError, 18 + ConflictError, 19 + CouldNotReachURLError, 20 + FileDoesNotExistError, 21 + HTTPError, 22 + InvalidItemFieldsError, 23 + MissingCredentialsError, 24 + ParamNotPassedError, 25 + PreConditionFailedError, 26 + PreConditionRequiredError, 27 + PyZoteroError, 28 + RequestEntityTooLargeError, 29 + ResourceNotFoundError, 30 + TooManyItemsError, 31 + TooManyRequestsError, 32 + TooManyRetriesError, 33 + UnsupportedParamsError, 34 + UploadError, 35 + UserNotAuthorisedError, 36 + ) 46 37 47 - class MissingCredentialsError(PyZoteroError): 48 - """ 49 - Raised when an attempt is made to create a Zotero instance 50 - without providing both the user ID and the user key 51 - """ 52 - 53 - pass 54 - 55 - 56 - class InvalidItemFieldsError(PyZoteroError): 57 - """Raised when an attempt is made to create/update items w/invalid fields""" 58 - 59 - pass 60 - 61 - 62 - class ResourceNotFoundError(PyZoteroError): 63 - """Raised when a resource (item, collection etc.) could not be found""" 64 - 65 - pass 66 - 67 - 68 - class HTTPError(PyZoteroError): 69 - """Raised for miscellaneous URLLib errors""" 70 - 71 - pass 72 - 73 - 74 - class CouldNotReachURLError(PyZoteroError): 75 - """Raised when we can't reach a URL""" 76 - 77 - pass 78 - 79 - 80 - class ConflictError(PyZoteroError): 81 - """409 - Raised when the target library is locked""" 82 - 83 - pass 84 - 85 - 86 - class PreConditionFailedError(PyZoteroError): 87 - """ 88 - 412 - Raised when the provided X-Zotero-Write-Token has already been 89 - submitted 90 - """ 91 - 92 - pass 93 - 94 - 95 - class RequestEntityTooLargeError(PyZoteroError): 96 - """ 97 - 413 - The upload would exceed the storage quota of the library owner. 98 - """ 99 - 100 - pass 101 - 102 - 103 - class PreConditionRequiredError(PyZoteroError): 104 - """ 105 - 428 - Raised when If-Match or If-None-Match was not provided. 106 - """ 107 - 108 - pass 109 - 110 - 111 - class TooManyRequestsError(PyZoteroError): 112 - """ 113 - 429 - Raised when there are too many unfinished uploads. 114 - Try again after the number of seconds specified in the Retry-After header. 115 - """ 116 - 117 - pass 118 - 119 - 120 - class FileDoesNotExistError(PyZoteroError): 121 - """ 122 - Raised when a file path to be attached can't be opened (or doesn't exist) 123 - """ 124 - 125 - pass 126 - 127 - 128 - class TooManyRetriesError(PyZoteroError): 129 - """ 130 - Raise after the backoff period for new requests exceeds 32s 131 - """ 132 - 133 - pass 134 - 135 - 136 - class UploadError(PyZoteroError): 137 - """ 138 - Raise if the connection drops during upload or some other non-HTTP error code is returned 139 - """ 140 - 141 - pass 38 + __all__ = [ 39 + "CallDoesNotExistError", 40 + "ConflictError", 41 + "CouldNotReachURLError", 42 + "FileDoesNotExistError", 43 + "HTTPError", 44 + "InvalidItemFieldsError", 45 + "MissingCredentialsError", 46 + "ParamNotPassedError", 47 + "PreConditionFailedError", 48 + "PreConditionRequiredError", 49 + "PyZoteroError", 50 + "RequestEntityTooLargeError", 51 + "ResourceNotFoundError", 52 + "TooManyItemsError", 53 + "TooManyRequestsError", 54 + "TooManyRetriesError", 55 + "UnsupportedParamsError", 56 + "UploadError", 57 + "UserNotAuthorisedError", 58 + ]