Coverage for notion_client / helpers.py: 100%

128 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-02 18:32 +0000

1"""Utility functions for notion-sdk-py.""" 

2 

3import re 

4from typing import ( 

5 Any, 

6 AsyncGenerator, 

7 Awaitable, 

8 Callable, 

9 Dict, 

10 Generator, 

11 List, 

12 Optional, 

13) 

14from urllib.parse import urlparse 

15from uuid import UUID 

16 

17 

18def pick(base: Dict[Any, Any], *keys: str) -> Dict[Any, Any]: 

19 """Return a dict composed of key value pairs for keys passed as args.""" 

20 result = {} 

21 for key in keys: 

22 if key not in base: 

23 continue 

24 value = base.get(key) 

25 if value is None and key == "start_cursor": 

26 continue 

27 result[key] = value 

28 return result 

29 

30 

31def get_url(object_id: str) -> str: 

32 """Return the URL for the object with the given id.""" 

33 return f"https://notion.so/{UUID(object_id).hex}" 

34 

35 

36def get_id(url: str) -> str: 

37 """Return the id of the object behind the given URL.""" 

38 parsed = urlparse(url) 

39 if parsed.netloc not in ("notion.so", "www.notion.so"): 

40 raise ValueError("Not a valid Notion URL.") 

41 path = parsed.path 

42 if len(path) < 32: 

43 raise ValueError("The path in the URL seems to be incorrect.") 

44 raw_id = path[-32:] 

45 return str(UUID(raw_id)) 

46 

47 

48def iterate_paginated_api( 

49 function: Callable[..., Any], **kwargs: Any 

50) -> Generator[Any, None, None]: 

51 """Return an iterator over the results of any paginated Notion API.""" 

52 next_cursor = kwargs.pop("start_cursor", None) 

53 

54 while True: 

55 response = function(**kwargs, start_cursor=next_cursor) 

56 for result in response.get("results"): 

57 yield result 

58 

59 next_cursor = response.get("next_cursor") 

60 if not response.get("has_more") or not next_cursor: 

61 return 

62 

63 

64def collect_paginated_api(function: Callable[..., Any], **kwargs: Any) -> List[Any]: 

65 """Collect all the results of paginating an API into a list.""" 

66 return [result for result in iterate_paginated_api(function, **kwargs)] 

67 

68 

69async def async_iterate_paginated_api( 

70 function: Callable[..., Awaitable[Any]], **kwargs: Any 

71) -> AsyncGenerator[Any, None]: 

72 """Return an async iterator over the results of any paginated Notion API.""" 

73 next_cursor = kwargs.pop("start_cursor", None) 

74 

75 while True: 

76 response = await function(**kwargs, start_cursor=next_cursor) 

77 for result in response.get("results"): 

78 yield result 

79 

80 next_cursor = response.get("next_cursor") 

81 if (not response["has_more"]) | (next_cursor is None): 

82 return 

83 

84 

85async def async_collect_paginated_api( 

86 function: Callable[..., Awaitable[Any]], **kwargs: Any 

87) -> List[Any]: 

88 """Collect asynchronously all the results of paginating an API into a list.""" 

89 return [result async for result in async_iterate_paginated_api(function, **kwargs)] 

90 

91 

92def is_full_block(response: Dict[Any, Any]) -> bool: 

93 """Return `True` if response is a full block.""" 

94 return response.get("object") == "block" and "type" in response 

95 

96 

97def is_full_page(response: Dict[Any, Any]) -> bool: 

98 """Return `True` if response is a full page.""" 

99 return response.get("object") == "page" and "url" in response 

100 

101 

102def is_full_data_source(response: Dict[Any, Any]) -> bool: 

103 """* Return `true` if `response` is a full data source.""" 

104 return response.get("object") == "data_source" 

105 

106 

107def is_full_database(response: Dict[Any, Any]) -> bool: 

108 """Return `True` if response is a full database.""" 

109 return response.get("object") == "database" and "title" in response 

110 

111 

112def is_full_page_or_data_source(response: Dict[Any, Any]) -> bool: 

113 """Return `True` if `response` is a full data_source or a full page.""" 

114 if response.get("object") == "data_source": 

115 return is_full_data_source(response) 

116 return is_full_page(response) 

117 

118 

119def is_full_user(response: Dict[Any, Any]) -> bool: 

120 """Return `True` if response is a full user.""" 

121 return "type" in response 

122 

123 

124def is_full_comment(response: Dict[Any, Any]) -> bool: 

125 """Return `True` if response is a full comment.""" 

126 return "type" in response 

127 

128 

129def is_full_view(response: Dict[Any, Any]) -> bool: 

130 """Return `True` if response is a full view.""" 

131 return "type" in response 

132 

133 

134def is_text_rich_text_item_response(rich_text: Dict[Any, Any]) -> bool: 

135 """Return `True` if `rich_text` is a text.""" 

136 return rich_text.get("type") == "text" 

137 

138 

139def is_equation_rich_text_item_response(rich_text: Dict[Any, Any]) -> bool: 

140 """Return `True` if `rich_text` is an equation.""" 

141 return rich_text.get("type") == "equation" 

142 

143 

144def is_mention_rich_text_item_response(rich_text: Dict[Any, Any]) -> bool: 

145 """Return `True` if `rich_text` is a mention.""" 

146 return rich_text.get("type") == "mention" 

147 

148 

149def _format_uuid(compact_uuid: str) -> str: 

150 """Format a compact UUID (32 chars) into standard format with hyphens.""" 

151 if len(compact_uuid) != 32: 

152 raise ValueError("UUID must be exactly 32 characters") 

153 

154 return ( 

155 f"{compact_uuid[:8]}-{compact_uuid[8:12]}-{compact_uuid[12:16]}-" 

156 f"{compact_uuid[16:20]}-{compact_uuid[20:]}" 

157 ) 

158 

159 

160def extract_notion_id(url_or_id: str) -> Optional[str]: 

161 """Extract a Notion ID from a Notion URL or return the input if it's already a valid ID. 

162 

163 Prioritizes path IDs over query parameters to avoid extracting view IDs instead of database IDs. 

164 

165 Returns the extracted UUID in standard format (with hyphens) or None if invalid. 

166 

167 ```python 

168 # Database URL with view ID - extracts database ID, not view ID 

169 extract_notion_id('https://notion.so/workspace/DB-abc123def456789012345678901234ab?v=viewid123') 

170 # Returns: 'abc123de-f456-7890-1234-5678901234ab' # database ID 

171 

172 # Already formatted UUID 

173 extract_notion_id('12345678-1234-1234-1234-123456789abc') 

174 # Returns: '12345678-1234-1234-1234-123456789abc' 

175 ``` 

176 """ 

177 if not url_or_id or not isinstance(url_or_id, str): 

178 return None 

179 

180 trimmed = url_or_id.strip() 

181 

182 # Check if it's already a properly formatted UUID 

183 uuid_pattern = re.compile( 

184 r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", re.IGNORECASE 

185 ) 

186 if uuid_pattern.match(trimmed): 

187 return trimmed.lower() 

188 

189 # Check if it's a compact UUID (32 chars, no hyphens) 

190 compact_uuid_pattern = re.compile(r"^[0-9a-f]{32}$", re.IGNORECASE) 

191 if compact_uuid_pattern.match(trimmed): 

192 return _format_uuid(trimmed.lower()) 

193 

194 # For URLs, check if it's a valid Notion domain 

195 if "://" in trimmed: 

196 if not re.search(r"://(?:www\.)?notion\.(?:so|site)/", trimmed, re.IGNORECASE): 

197 return None 

198 

199 # Fallback to query parameters if no direct ID found 

200 query_match = re.search( 

201 r"[?&](?:p|page_id|database_id)=([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|[0-9a-f]{32})", 

202 trimmed, 

203 re.IGNORECASE, 

204 ) 

205 if query_match: 

206 match_str = query_match.group(1).lower() 

207 return match_str if "-" in match_str else _format_uuid(match_str) 

208 

209 # Last resort: any 32-char hex string in the URL 

210 any_match = re.search(r"([0-9a-f]{32})", trimmed, re.IGNORECASE) 

211 if any_match: 

212 return _format_uuid(any_match.group(1).lower()) 

213 

214 return None 

215 

216 

217def extract_database_id(database_url: str) -> Optional[str]: 

218 """Extract a database ID from a Notion URL or validate if it's already a valid ID. 

219 

220 This is an alias for `extract_notion_id` for clarity when working with databases. 

221 

222 Returns the extracted UUID in standard format (with hyphens) or None if invalid. 

223 """ 

224 return extract_notion_id(database_url) 

225 

226 

227def extract_page_id(page_url: str) -> Optional[str]: 

228 """Extract a page ID from a Notion URL or validate if it's already a valid ID. 

229 

230 This is an alias for `extract_notion_id` for clarity when working with pages. 

231 

232 Returns the extracted UUID in standard format (with hyphens) or None if invalid. 

233 """ 

234 return extract_notion_id(page_url) 

235 

236 

237def extract_block_id(url_or_id: str) -> Optional[str]: 

238 """Extract a block ID from a Notion URL fragment or validate if it's already a valid ID. 

239 

240 Specifically looks for block IDs in URL fragments (after #). 

241 If no fragment is present, falls back to `extract_notion_id` behavior. 

242 

243 Returns the extracted UUID in standard format (with hyphens) or None if invalid. 

244 """ 

245 if not url_or_id or not isinstance(url_or_id, str): 

246 return None 

247 

248 # Look for block fragment in URL (#block-32chars or just #32chars or #formatted-uuid) 

249 block_match = re.search( 

250 r"#(?:block-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|[0-9a-f]{32})", 

251 url_or_id, 

252 re.IGNORECASE, 

253 ) 

254 if block_match: 

255 match_str = block_match.group(1).lower() 

256 # If it's already formatted, return as is; otherwise format it 

257 return match_str if "-" in match_str else _format_uuid(match_str) 

258 

259 # Fall back to general ID extraction for non-URL inputs 

260 return extract_notion_id(url_or_id) 

261 

262 

263def iterate_data_source_templates( 

264 function: Callable[..., Any], **kwargs: Any 

265) -> Generator[Any, None, None]: 

266 """Return an iterator over templates from a data source. 

267 

268 Example: 

269 

270 ```python 

271 for template in iterate_data_source_templates( 

272 client.data_sources.list_templates, 

273 data_source_id=data_source_id, 

274 ): 

275 print(template["name"], template["is_default"]) 

276 ``` 

277 """ 

278 next_cursor = kwargs.pop("start_cursor", None) 

279 

280 while True: 

281 response = function(**kwargs, start_cursor=next_cursor) 

282 for template in response.get("templates", []): 

283 yield template # pragma: no cover 

284 

285 next_cursor = response.get("next_cursor") 

286 if not response.get("has_more") or not next_cursor: 

287 return 

288 

289 

290def collect_data_source_templates( 

291 function: Callable[..., Any], **kwargs: Any 

292) -> List[Any]: 

293 """Collect all templates from a data source into a list. 

294 

295 Example: 

296 

297 ```python 

298 templates = collect_data_source_templates( 

299 client.data_sources.list_templates, 

300 data_source_id=data_source_id, 

301 ) 

302 # Do something with templates. 

303 ``` 

304 """ 

305 return [template for template in iterate_data_source_templates(function, **kwargs)] 

306 

307 

308async def async_iterate_data_source_templates( 

309 function: Callable[..., Awaitable[Any]], **kwargs: Any 

310) -> AsyncGenerator[Any, None]: 

311 """Return an async iterator over templates from a data source. 

312 

313 Example: 

314 

315 ```python 

316 async for template in async_iterate_data_source_templates( 

317 async_client.data_sources.list_templates, 

318 data_source_id=data_source_id, 

319 ): 

320 print(template["name"], template["is_default"]) 

321 ``` 

322 """ 

323 next_cursor = kwargs.pop("start_cursor", None) 

324 

325 while True: 

326 response = await function(**kwargs, start_cursor=next_cursor) 

327 for template in response.get("templates", []): 

328 yield template # pragma: no cover 

329 

330 next_cursor = response.get("next_cursor") 

331 if not response.get("has_more") or not next_cursor: 

332 return 

333 

334 

335async def async_collect_data_source_templates( 

336 function: Callable[..., Awaitable[Any]], **kwargs: Any 

337) -> List[Any]: 

338 """Collect asynchronously all templates from a data source into a list. 

339 

340 Example: 

341 

342 ```python 

343 templates = await async_collect_data_source_templates( 

344 async_client.data_sources.list_templates, 

345 data_source_id=data_source_id, 

346 ) 

347 # Do something with templates. 

348 ``` 

349 """ 

350 return [ 

351 template 

352 async for template in async_iterate_data_source_templates(function, **kwargs) 

353 ]