# Define your Notion API token and database ID
NOTION_API_TOKEN = 'your_notion_api_token_here'
DATABASE_ID_ITEMS = 'your_items_database_id_here' # ID of your ITEMS database
DATABASE_ID_SOURCES = 'your_sources_database_id_here' # ID of your SOURCES database
# Set the Notion API headers
"Authorization": f"Bearer {NOTION_API_TOKEN}",
"Content-Type": "application/json",
"Notion-Version": "2022-06-28"
# Helper function to make Notion API requests
def notion_request(url, params=None):
response = requests.post(url, headers=headers, json=params)
response.raise_for_status() # Raise an exception for any failed request
# Helper function to extract text from a block's rich_text
def extract_text_from_block(block):
block_type = block.get('type', '')
# Identify the correct field to extract text from
if block_type == 'paragraph':
rich_text_field = block.get('paragraph', {}).get('rich_text', [])
elif block_type == 'bulleted_list_item':
rich_text_field = block.get('bulleted_list_item', {}).get('rich_text', [])
elif block_type == 'toggle':
rich_text_field = block.get('toggle', {}).get('rich_text', [])
# Extract the content from the rich_text field
text = ''.join([t['text']['content'] for t in rich_text_field if 'text' in t])
# Decode HTML entities (like " for quotes)
text = html.unescape(text)
# Function to process a toggle and its children
def process_toggle_block(block):
toggle_heading_text = extract_text_from_block(block)
blocks = [f"Toggle Heading: {toggle_heading_text}"]
# Process child blocks under the toggle
if 'children' in block['toggle']:
for child_block in block['toggle']['children']:
blocks.extend(process_block(child_block)) # Recurse for child blocks
# Function to process any block
def process_block(block):
block_type = block.get('type', '')
if block_type == 'paragraph' or block_type == 'bulleted_list_item':
# For paragraph or bulleted list, extract text using helper function
return [extract_text_from_block(block)]
elif block_type == 'toggle':
# Process toggle blocks and their children
return process_toggle_block(block)
# Add more block types if needed
# Loop through all blocks and get text content from them
def get_text_blocks_from_page(data):
for block in data.get('results', []):
# Process the block and append the result
blocks.extend(process_block(block))
# Fetch all items from the ITEMS database
url = f"https://api.notion.com/v1/databases/{DATABASE_ID_ITEMS}/query"
data = notion_request(url)
for result in data.get('results', []):
"name": result['properties'].get('Name', {}).get('title', [{}])[0].get('text', {}).get('content'),
"page_id": result['properties'].get('Page_ID', {}).get('rich_text', [{}])[0].get('text', {}).get('content')
# Combine the data and flatten it
page_id = item.get("page_id")
url = f"https://api.notion.com/v1/blocks/{page_id}/children"
page_data = notion_request(url)
text_blocks = get_text_blocks_from_page(page_data)
"item_name": item['name'],
"text_blocks": text_blocks
all_data.append(item_data)
# Write the flattened data into a JSON file
all_data = combine_data()
with open('notion_items_text_blocks.json', 'w', encoding='utf-8') as outfile:
json.dump(all_data, outfile, indent=4, ensure_ascii=False)
# Run the function to save the data