Coverage for notion_client/helpers.py: 100%

104 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-01 19:30 +0000

1"""Utility functions for notion-sdk-py.""" 

2 

3import re 

4from typing import ( 

5 Any, 

6 AsyncGenerator, 

7 Awaitable, 

8 Callable, 

9 Dict, 

10 Generator, 

11 List, 

12 Optional, 

13) 

14from urllib.parse import urlparse 

15from uuid import UUID 

16 

17 

18def pick(base: Dict[Any, Any], *keys: str) -> Dict[Any, Any]: 

19 """Return a dict composed of key value pairs for keys passed as args.""" 

20 result = {} 

21 for key in keys: 

22 if key not in base: 

23 continue 

24 value = base.get(key) 

25 if value is None and key == "start_cursor": 

26 continue 

27 result[key] = value 

28 return result 

29 

30 

31def get_url(object_id: str) -> str: 

32 """Return the URL for the object with the given id.""" 

33 return f"https://notion.so/{UUID(object_id).hex}" 

34 

35 

36def get_id(url: str) -> str: 

37 """Return the id of the object behind the given URL.""" 

38 parsed = urlparse(url) 

39 if parsed.netloc not in ("notion.so", "www.notion.so"): 

40 raise ValueError("Not a valid Notion URL.") 

41 path = parsed.path 

42 if len(path) < 32: 

43 raise ValueError("The path in the URL seems to be incorrect.") 

44 raw_id = path[-32:] 

45 return str(UUID(raw_id)) 

46 

47 

48def iterate_paginated_api( 

49 function: Callable[..., Any], **kwargs: Any 

50) -> Generator[Any, None, None]: 

51 """Return an iterator over the results of any paginated Notion API.""" 

52 next_cursor = kwargs.pop("start_cursor", None) 

53 

54 while True: 

55 response = function(**kwargs, start_cursor=next_cursor) 

56 for result in response.get("results"): 

57 yield result 

58 

59 next_cursor = response.get("next_cursor") 

60 if not response.get("has_more") or not next_cursor: 

61 return 

62 

63 

64def collect_paginated_api(function: Callable[..., Any], **kwargs: Any) -> List[Any]: 

65 """Collect all the results of paginating an API into a list.""" 

66 return [result for result in iterate_paginated_api(function, **kwargs)] 

67 

68 

69async def async_iterate_paginated_api( 

70 function: Callable[..., Awaitable[Any]], **kwargs: Any 

71) -> AsyncGenerator[Any, None]: 

72 """Return an async iterator over the results of any paginated Notion API.""" 

73 next_cursor = kwargs.pop("start_cursor", None) 

74 

75 while True: 

76 response = await function(**kwargs, start_cursor=next_cursor) 

77 for result in response.get("results"): 

78 yield result 

79 

80 next_cursor = response.get("next_cursor") 

81 if (not response["has_more"]) | (next_cursor is None): 

82 return 

83 

84 

85async def async_collect_paginated_api( 

86 function: Callable[..., Awaitable[Any]], **kwargs: Any 

87) -> List[Any]: 

88 """Collect asynchronously all the results of paginating an API into a list.""" 

89 return [result async for result in async_iterate_paginated_api(function, **kwargs)] 

90 

91 

92def is_full_block(response: Dict[Any, Any]) -> bool: 

93 """Return `True` if response is a full block.""" 

94 return response.get("object") == "block" and "type" in response 

95 

96 

97def is_full_page(response: Dict[Any, Any]) -> bool: 

98 """Return `True` if response is a full page.""" 

99 return response.get("object") == "page" and "url" in response 

100 

101 

102def is_full_database(response: Dict[Any, Any]) -> bool: 

103 """Return `True` if response is a full database.""" 

104 return response.get("object") == "database" and "title" in response 

105 

106 

107def is_full_page_or_database(response: Dict[Any, Any]) -> bool: 

108 """Return `True` if `response` is a full database or a full page.""" 

109 if response.get("object") == "database": 

110 return is_full_database(response) 

111 return is_full_page(response) 

112 

113 

114def is_full_user(response: Dict[Any, Any]) -> bool: 

115 """Return `True` if response is a full user.""" 

116 return "type" in response 

117 

118 

119def is_full_comment(response: Dict[Any, Any]) -> bool: 

120 """Return `True` if response is a full comment.""" 

121 return "type" in response 

122 

123 

124def is_text_rich_text_item_response(rich_text: Dict[Any, Any]) -> bool: 

125 """Return `True` if `rich_text` is a text.""" 

126 return rich_text.get("type") == "text" 

127 

128 

129def is_equation_rich_text_item_response(rich_text: Dict[Any, Any]) -> bool: 

130 """Return `True` if `rich_text` is an equation.""" 

131 return rich_text.get("type") == "equation" 

132 

133 

134def is_mention_rich_text_item_response(rich_text: Dict[Any, Any]) -> bool: 

135 """Return `True` if `rich_text` is a mention.""" 

136 return rich_text.get("type") == "mention" 

137 

138 

139def _format_uuid(compact_uuid: str) -> str: 

140 """Format a compact UUID (32 chars) into standard format with hyphens.""" 

141 if len(compact_uuid) != 32: 

142 raise ValueError("UUID must be exactly 32 characters") 

143 

144 return ( 

145 f"{compact_uuid[:8]}-{compact_uuid[8:12]}-{compact_uuid[12:16]}-" 

146 f"{compact_uuid[16:20]}-{compact_uuid[20:]}" 

147 ) 

148 

149 

150def extract_notion_id(url_or_id: str) -> Optional[str]: 

151 """Extract a Notion ID from a Notion URL or return the input if it's already a valid ID. 

152 

153 Prioritizes path IDs over query parameters to avoid extracting view IDs instead of database IDs. 

154 

155 Returns the extracted UUID in standard format (with hyphens) or None if invalid. 

156 

157 ```python 

158 # Database URL with view ID - extracts database ID, not view ID 

159 extract_notion_id('https://notion.so/workspace/DB-abc123def456789012345678901234ab?v=viewid123') 

160 # Returns: 'abc123de-f456-7890-1234-5678901234ab' # database ID 

161 

162 # Already formatted UUID 

163 extract_notion_id('12345678-1234-1234-1234-123456789abc') 

164 # Returns: '12345678-1234-1234-1234-123456789abc' 

165 ``` 

166 """ 

167 if not url_or_id or not isinstance(url_or_id, str): 

168 return None 

169 

170 trimmed = url_or_id.strip() 

171 

172 # Check if it's already a properly formatted UUID 

173 uuid_pattern = re.compile( 

174 r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", re.IGNORECASE 

175 ) 

176 if uuid_pattern.match(trimmed): 

177 return trimmed.lower() 

178 

179 # Check if it's a compact UUID (32 chars, no hyphens) 

180 compact_uuid_pattern = re.compile(r"^[0-9a-f]{32}$", re.IGNORECASE) 

181 if compact_uuid_pattern.match(trimmed): 

182 return _format_uuid(trimmed.lower()) 

183 

184 # For URLs, check if it's a valid Notion domain 

185 if "://" in trimmed: 

186 if not re.search(r"://(?:www\.)?notion\.(?:so|site)/", trimmed, re.IGNORECASE): 

187 return None 

188 

189 # Fallback to query parameters if no direct ID found 

190 query_match = re.search( 

191 r"[?&](?:p|page_id|database_id)=([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|[0-9a-f]{32})", 

192 trimmed, 

193 re.IGNORECASE, 

194 ) 

195 if query_match: 

196 match_str = query_match.group(1).lower() 

197 return match_str if "-" in match_str else _format_uuid(match_str) 

198 

199 # Last resort: any 32-char hex string in the URL 

200 any_match = re.search(r"([0-9a-f]{32})", trimmed, re.IGNORECASE) 

201 if any_match: 

202 return _format_uuid(any_match.group(1).lower()) 

203 

204 return None 

205 

206 

207def extract_database_id(database_url: str) -> Optional[str]: 

208 """Extract a database ID from a Notion URL or validate if it's already a valid ID. 

209 

210 This is an alias for `extract_notion_id` for clarity when working with databases. 

211 

212 Returns the extracted UUID in standard format (with hyphens) or None if invalid. 

213 """ 

214 return extract_notion_id(database_url) 

215 

216 

217def extract_page_id(page_url: str) -> Optional[str]: 

218 """Extract a page ID from a Notion URL or validate if it's already a valid ID. 

219 

220 This is an alias for `extract_notion_id` for clarity when working with pages. 

221 

222 Returns the extracted UUID in standard format (with hyphens) or None if invalid. 

223 """ 

224 return extract_notion_id(page_url) 

225 

226 

227def extract_block_id(url_or_id: str) -> Optional[str]: 

228 """Extract a block ID from a Notion URL fragment or validate if it's already a valid ID. 

229 

230 Specifically looks for block IDs in URL fragments (after #). 

231 If no fragment is present, falls back to `extract_notion_id` behavior. 

232 

233 Returns the extracted UUID in standard format (with hyphens) or None if invalid. 

234 """ 

235 if not url_or_id or not isinstance(url_or_id, str): 

236 return None 

237 

238 # Look for block fragment in URL (#block-32chars or just #32chars or #formatted-uuid) 

239 block_match = re.search( 

240 r"#(?:block-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|[0-9a-f]{32})", 

241 url_or_id, 

242 re.IGNORECASE, 

243 ) 

244 if block_match: 

245 match_str = block_match.group(1).lower() 

246 # If it's already formatted, return as is; otherwise format it 

247 return match_str if "-" in match_str else _format_uuid(match_str) 

248 

249 # Fall back to general ID extraction for non-URL inputs 

250 return extract_notion_id(url_or_id)