"""
CDP-based Xiaohongshu publisher.
Connects to a Chrome instance via Chrome DevTools Protocol to automate
publishing articles on Xiaohongshu (RED) creator center.
CLI usage:
# Basic commands (image-text mode)
python cdp_publish.py check-login [--headless] [--account NAME]
python cdp_publish.py fill --title "标题" --content "正文" --images img1.jpg [--headless] [--account NAME]
python cdp_publish.py publish --title "标题" --content "正文" --images img1.jpg [--headless] [--account NAME]
python cdp_publish.py click-publish [--headless] [--account NAME]
# Long article mode
python cdp_publish.py long-article --title "标题" --content "正文" [--images img1.jpg] [--account NAME]
python cdp_publish.py click-next-step [--account NAME]
# Account management
python cdp_publish.py login [--account NAME] # open browser for QR login
python cdp_publish.py re-login [--account NAME] # clear cookies and re-login same account
python cdp_publish.py switch-account [--account NAME] # clear cookies + open login for new account
python cdp_publish.py list-accounts # list all configured accounts
python cdp_publish.py add-account NAME [--alias ALIAS] # add a new account
python cdp_publish.py remove-account NAME # remove an account
Library usage:
from cdp_publish import XiaohongshuPublisher
publisher = XiaohongshuPublisher()
publisher.connect()
publisher.check_login()
publisher.publish(
title="Article title",
content="Article body text",
image_paths=["/path/to/img1.jpg", "/path/to/img2.jpg"],
)
"""
import json
import os
import time
import sys
from typing import Any
# Ensure UTF-8 output on Windows consoles
if sys.platform == "win32":
os.environ.setdefault("PYTHONIOENCODING", "utf-8")
try:
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
import requests
import websockets.sync.client as ws_client
# ---------------------------------------------------------------------------
# Configuration - centralised selectors and URLs for easy maintenance
# ---------------------------------------------------------------------------
CDP_HOST = "127.0.0.1"
CDP_PORT = 9222
# Xiaohongshu URLs
XHS_CREATOR_URL = "https://creator.xiaohongshu.com/publish/publish"
XHS_HOME_URL = "https://www.xiaohongshu.com"
XHS_LOGIN_CHECK_URL = "https://creator.xiaohongshu.com"
# DOM selectors (update these when Xiaohongshu changes their page structure)
# Last verified: 2026-02
SELECTORS = {
# "上传图文" tab - must click before uploading images
"image_text_tab": "div.creator-tab",
"image_text_tab_text": "上传图文",
# Upload area - the file input element for images (visible after clicking tab)
"upload_input": "input.upload-input",
"upload_input_alt": 'input[type="file"]',
# Title input field (visible after image upload)
"title_input": 'input[placeholder*="填写标题"]',
"title_input_alt": "input.d-text",
# Content editor area - TipTap/ProseMirror contenteditable div
"content_editor": "div.tiptap.ProseMirror",
"content_editor_alt": 'div.ProseMirror[contenteditable="true"]',
# Publish button
"publish_button_text": "发布",
# Login indicator - URL-based check (redirect to /login if not logged in)
"login_indicator": '.user-info, .creator-header, [class*="user"]',
# Long article mode
"long_article_tab_text": "写长文",
"new_creation_btn_text": "新的创作",
"long_title_input": 'textarea.d-text[placeholder="输入标题"]',
"auto_format_btn_text": "一键排版",
"next_step_btn_text": "下一步",
"template_card": ".template-card",
}
# Timing
PAGE_LOAD_WAIT = 3 # seconds to wait after navigation
TAB_CLICK_WAIT = 2 # seconds to wait after clicking tab
UPLOAD_WAIT = 6 # seconds to wait after image upload for editor to appear
ACTION_INTERVAL = 1 # seconds between actions
AUTO_FORMAT_WAIT = 5 # seconds to wait after clicking auto-format
TEMPLATE_WAIT = 10 # seconds max to wait for template cards to appear
class CDPError(Exception):
"""Error communicating with Chrome via CDP."""
class XiaohongshuPublisher:
"""Automates publishing to Xiaohongshu via CDP."""
def __init__(self, host: str = CDP_HOST, port: int = CDP_PORT):
self.host = host
self.port = port
self.ws = None
self._msg_id = 0
# ------------------------------------------------------------------
# CDP connection management
# ------------------------------------------------------------------
def _get_targets(self) -> list[dict]:
"""Get list of available browser targets (tabs). Retries once on failure."""
url = f"http://{self.host}:{self.port}/json"
for attempt in range(2):
try:
resp = requests.get(url, timeout=5)
resp.raise_for_status()
return resp.json()
except Exception as e:
if attempt == 0:
print(f"[cdp_publish] CDP connection failed ({e}), restarting Chrome...")
from chrome_launcher import ensure_chrome
ensure_chrome(self.port)
time.sleep(2)
else:
raise CDPError(f"Cannot reach Chrome on {self.host}:{self.port}: {e}")
def _find_or_create_tab(self, target_url_prefix: str = "") -> str:
"""Find an existing tab matching the URL prefix, or return the first page tab."""
targets = self._get_targets()
pages = [t for t in targets if t.get("type") == "page"]
if target_url_prefix:
for t in pages:
if t.get("url", "").startswith(target_url_prefix):
return t["webSocketDebuggerUrl"]
# Create a new tab
resp = requests.put(
f"http://{self.host}:{self.port}/json/new?{XHS_CREATOR_URL}",
timeout=5,
)
if resp.ok:
return resp.json().get("webSocketDebuggerUrl", "")
# Fallback: use first available page
if pages:
return pages[0]["webSocketDebuggerUrl"]
raise CDPError("No browser tabs available.")
def connect(self, target_url_prefix: str = ""):
"""Connect to a Chrome tab via WebSocket."""
ws_url = self._find_or_create_tab(target_url_prefix)
if not ws_url:
raise CDPError("Could not obtain WebSocket URL for any tab.")
print(f"[cdp_publish] Connecting to {ws_url}")
self.ws = ws_client.connect(ws_url)
print("[cdp_publish] Connected to Chrome tab.")
def disconnect(self):
"""Close the WebSocket connection."""
if self.ws:
self.ws.close()
self.ws = None
# ------------------------------------------------------------------
# CDP command helpers
# ------------------------------------------------------------------
def _send(self, method: str, params: dict | None = None) -> dict:
"""Send a CDP command and return the result."""
if not self.ws:
raise CDPError("Not connected. Call connect() first.")
self._msg_id += 1
msg = {"id": self._msg_id, "method": method}
if params:
msg["params"] = params
self.ws.send(json.dumps(msg))
# Wait for the matching response
while True:
raw = self.ws.recv()
data = json.loads(raw)
if data.get("id") == self._msg_id:
if "error" in data:
raise CDPError(f"CDP error: {data['error']}")
return data.get("result", {})
# else: it's an event, skip it
def _evaluate(self, expression: str) -> Any:
"""Execute JavaScript in the page and return the result value."""
result = self._send("Runtime.evaluate", {
"expression": expression,
"returnByValue": True,
"awaitPromise": True,
})
remote_obj = result.get("result", {})
if remote_obj.get("subtype") == "error":
raise CDPError(f"JS error: {remote_obj.get('description', remote_obj)}")
return remote_obj.get("value")
def _navigate(self, url: str):
"""Navigate the current tab to the given URL and wait for load."""
print(f"[cdp_publish] Navigating to {url}")
self._send("Page.enable")
self._send("Page.navigate", {"url": url})
time.sleep(PAGE_LOAD_WAIT)
# ------------------------------------------------------------------
# Login check
# ------------------------------------------------------------------
def check_login(self) -> bool:
"""
Navigate to Xiaohongshu creator center and check if the user is logged in.
Returns True if logged in. If not logged in, prints instructions
and returns False.
"""
self._navigate(XHS_LOGIN_CHECK_URL)
time.sleep(2)
# Check if we got redirected to a login page
current_url = self._evaluate("window.location.href")
print(f"[cdp_publish] Current URL: {current_url}")
if "login" in current_url.lower():
print(
"\n[cdp_publish] NOT LOGGED IN.\n"
" Please scan the QR code in the Chrome window to log in,\n"
" then run this script again.\n"
)
return False
print("[cdp_publish] Login confirmed.")
return True
def clear_cookies(self, domain: str = ".xiaohongshu.com"):
"""
Clear all cookies for the given domain to force re-login.
Used when switching accounts.
"""
print(f"[cdp_publish] Clearing cookies for {domain}...")
self._send("Network.enable")
self._send("Network.clearBrowserCookies")
# Also clear storage
self._send("Storage.clearDataForOrigin", {
"origin": "https://www.xiaohongshu.com",
"storageTypes": "cookies,local_storage,session_storage",
})
self._send("Storage.clearDataForOrigin", {
"origin": "https://creator.xiaohongshu.com",
"storageTypes": "cookies,local_storage,session_storage",
})
print("[cdp_publish] Cookies and storage cleared.")
def open_login_page(self):
"""
Navigate to the Xiaohongshu login page for QR code scanning.
Used for initial login or after clearing cookies for account switch.
"""
self._navigate(XHS_LOGIN_CHECK_URL)
time.sleep(2)
current_url = self._evaluate("window.location.href")
if "login" not in current_url.lower():
# Already logged in, navigate to login page explicitly
self._navigate("https://creator.xiaohongshu.com/login")
time.sleep(2)
print(
"\n[cdp_publish] Login page is open.\n"
" Please scan the QR code in the Chrome window to log in.\n"
)
# ------------------------------------------------------------------
# Publishing actions
# ------------------------------------------------------------------
def _click_image_text_tab(self):
"""Click the '上传图文' tab to switch to image+text publish mode."""
print("[cdp_publish] Clicking '上传图文' tab...")
tab_text = SELECTORS["image_text_tab_text"]
selector = SELECTORS["image_text_tab"]
clicked = self._evaluate(f"""
(function() {{
var tabs = document.querySelectorAll('{selector}');
for (var i = 0; i < tabs.length; i++) {{
if (tabs[i].textContent.trim() === '{tab_text}') {{
tabs[i].click();
return true;
}}
}}
return false;
}})();
""")
if not clicked:
raise CDPError(
f"Could not find '{tab_text}' tab. "
"The page structure may have changed."
)
print("[cdp_publish] Tab clicked, waiting for upload area...")
time.sleep(TAB_CLICK_WAIT)
def _upload_images(self, image_paths: list[str]):
"""Upload images via the file input element."""
if not image_paths:
print("[cdp_publish] No images to upload, skipping.")
return
# Normalize paths (forward slashes for CDP)
normalized = [p.replace("\\", "/") for p in image_paths]
print(f"[cdp_publish] Uploading {len(image_paths)} image(s)...")
# Enable DOM domain
self._send("DOM.enable")
# Get the document root
doc = self._send("DOM.getDocument")
root_id = doc["root"]["nodeId"]
# Try primary selector, then fallback
node_id = 0
for selector in (SELECTORS["upload_input"], SELECTORS["upload_input_alt"]):
result = self._send("DOM.querySelector", {
"nodeId": root_id,
"selector": selector,
})
node_id = result.get("nodeId", 0)
if node_id:
break
if not node_id:
raise CDPError(
"Could not find file input element.\n"
"The page structure may have changed. Check references/publish-workflow.md."
)
# Use DOM.setFileInputFiles to set the files
self._send("DOM.setFileInputFiles", {
"nodeId": node_id,
"files": normalized,
})
print("[cdp_publish] Images uploaded. Waiting for editor to appear...")
time.sleep(UPLOAD_WAIT)
def _fill_title(self, title: str):
"""Fill in the article title."""
print(f"[cdp_publish] Setting title: {title[:40]}...")
time.sleep(ACTION_INTERVAL)
for selector in (SELECTORS["title_input"], SELECTORS["title_input_alt"]):
found = self._evaluate(f"!!document.querySelector('{selector}')")
if found:
escaped_title = json.dumps(title)
self._evaluate(f"""
(function() {{
var el = document.querySelector('{selector}');
var nativeSetter = Object.getOwnPropertyDescriptor(
window.HTMLInputElement.prototype, 'value'
).set;
el.focus();
nativeSetter.call(el, {escaped_title});
el.dispatchEvent(new Event('input', {{ bubbles: true }}));
el.dispatchEvent(new Event('change', {{ bubbles: true }}));
}})();
""")
print("[cdp_publish] Title set.")
return
raise CDPError("Could not find title input element.")
def _fill_content(self, content: str):
"""Fill in the article body content using the TipTap/ProseMirror editor."""
print(f"[cdp_publish] Setting content ({len(content)} chars)...")
time.sleep(ACTION_INTERVAL)
for selector in (SELECTORS["content_editor"], SELECTORS["content_editor_alt"]):
found = self._evaluate(f"!!document.querySelector('{selector}')")
if found:
escaped = json.dumps(content)
self._evaluate(f"""
(function() {{
var el = document.querySelector('{selector}');
el.focus();
var text = {escaped};
var paragraphs = text.split('\\n').filter(function(p) {{ return p.trim(); }});
var html = [];
for (var i = 0; i < paragraphs.length; i++) {{
html.push('