""" CDP-based Xiaohongshu publisher. Connects to a Chrome instance via Chrome DevTools Protocol to automate publishing articles on Xiaohongshu (RED) creator center. CLI usage: # Basic commands (image-text mode) python cdp_publish.py check-login [--headless] [--account NAME] python cdp_publish.py fill --title "标题" --content "正文" --images img1.jpg [--headless] [--account NAME] python cdp_publish.py publish --title "标题" --content "正文" --images img1.jpg [--headless] [--account NAME] python cdp_publish.py click-publish [--headless] [--account NAME] # Long article mode python cdp_publish.py long-article --title "标题" --content "正文" [--images img1.jpg] [--account NAME] python cdp_publish.py click-next-step [--account NAME] # Account management python cdp_publish.py login [--account NAME] # open browser for QR login python cdp_publish.py re-login [--account NAME] # clear cookies and re-login same account python cdp_publish.py switch-account [--account NAME] # clear cookies + open login for new account python cdp_publish.py list-accounts # list all configured accounts python cdp_publish.py add-account NAME [--alias ALIAS] # add a new account python cdp_publish.py remove-account NAME # remove an account Library usage: from cdp_publish import XiaohongshuPublisher publisher = XiaohongshuPublisher() publisher.connect() publisher.check_login() publisher.publish( title="Article title", content="Article body text", image_paths=["/path/to/img1.jpg", "/path/to/img2.jpg"], ) """ import json import os import time import sys from typing import Any # Ensure UTF-8 output on Windows consoles if sys.platform == "win32": os.environ.setdefault("PYTHONIOENCODING", "utf-8") try: sys.stdout.reconfigure(encoding="utf-8", errors="replace") sys.stderr.reconfigure(encoding="utf-8", errors="replace") except Exception: pass import requests import websockets.sync.client as ws_client # --------------------------------------------------------------------------- # Configuration - centralised selectors and URLs for easy maintenance # --------------------------------------------------------------------------- CDP_HOST = "127.0.0.1" CDP_PORT = 9222 # Xiaohongshu URLs XHS_CREATOR_URL = "https://creator.xiaohongshu.com/publish/publish" XHS_HOME_URL = "https://www.xiaohongshu.com" XHS_LOGIN_CHECK_URL = "https://creator.xiaohongshu.com" # DOM selectors (update these when Xiaohongshu changes their page structure) # Last verified: 2026-02 SELECTORS = { # "上传图文" tab - must click before uploading images "image_text_tab": "div.creator-tab", "image_text_tab_text": "上传图文", # Upload area - the file input element for images (visible after clicking tab) "upload_input": "input.upload-input", "upload_input_alt": 'input[type="file"]', # Title input field (visible after image upload) "title_input": 'input[placeholder*="填写标题"]', "title_input_alt": "input.d-text", # Content editor area - TipTap/ProseMirror contenteditable div "content_editor": "div.tiptap.ProseMirror", "content_editor_alt": 'div.ProseMirror[contenteditable="true"]', # Publish button "publish_button_text": "发布", # Login indicator - URL-based check (redirect to /login if not logged in) "login_indicator": '.user-info, .creator-header, [class*="user"]', # Long article mode "long_article_tab_text": "写长文", "new_creation_btn_text": "新的创作", "long_title_input": 'textarea.d-text[placeholder="输入标题"]', "auto_format_btn_text": "一键排版", "next_step_btn_text": "下一步", "template_card": ".template-card", } # Timing PAGE_LOAD_WAIT = 3 # seconds to wait after navigation TAB_CLICK_WAIT = 2 # seconds to wait after clicking tab UPLOAD_WAIT = 6 # seconds to wait after image upload for editor to appear ACTION_INTERVAL = 1 # seconds between actions AUTO_FORMAT_WAIT = 5 # seconds to wait after clicking auto-format TEMPLATE_WAIT = 10 # seconds max to wait for template cards to appear class CDPError(Exception): """Error communicating with Chrome via CDP.""" class XiaohongshuPublisher: """Automates publishing to Xiaohongshu via CDP.""" def __init__(self, host: str = CDP_HOST, port: int = CDP_PORT): self.host = host self.port = port self.ws = None self._msg_id = 0 # ------------------------------------------------------------------ # CDP connection management # ------------------------------------------------------------------ def _get_targets(self) -> list[dict]: """Get list of available browser targets (tabs). Retries once on failure.""" url = f"http://{self.host}:{self.port}/json" for attempt in range(2): try: resp = requests.get(url, timeout=5) resp.raise_for_status() return resp.json() except Exception as e: if attempt == 0: print(f"[cdp_publish] CDP connection failed ({e}), restarting Chrome...") from chrome_launcher import ensure_chrome ensure_chrome(self.port) time.sleep(2) else: raise CDPError(f"Cannot reach Chrome on {self.host}:{self.port}: {e}") def _find_or_create_tab(self, target_url_prefix: str = "") -> str: """Find an existing tab matching the URL prefix, or return the first page tab.""" targets = self._get_targets() pages = [t for t in targets if t.get("type") == "page"] if target_url_prefix: for t in pages: if t.get("url", "").startswith(target_url_prefix): return t["webSocketDebuggerUrl"] # Create a new tab resp = requests.put( f"http://{self.host}:{self.port}/json/new?{XHS_CREATOR_URL}", timeout=5, ) if resp.ok: return resp.json().get("webSocketDebuggerUrl", "") # Fallback: use first available page if pages: return pages[0]["webSocketDebuggerUrl"] raise CDPError("No browser tabs available.") def connect(self, target_url_prefix: str = ""): """Connect to a Chrome tab via WebSocket.""" ws_url = self._find_or_create_tab(target_url_prefix) if not ws_url: raise CDPError("Could not obtain WebSocket URL for any tab.") print(f"[cdp_publish] Connecting to {ws_url}") self.ws = ws_client.connect(ws_url) print("[cdp_publish] Connected to Chrome tab.") def disconnect(self): """Close the WebSocket connection.""" if self.ws: self.ws.close() self.ws = None # ------------------------------------------------------------------ # CDP command helpers # ------------------------------------------------------------------ def _send(self, method: str, params: dict | None = None) -> dict: """Send a CDP command and return the result.""" if not self.ws: raise CDPError("Not connected. Call connect() first.") self._msg_id += 1 msg = {"id": self._msg_id, "method": method} if params: msg["params"] = params self.ws.send(json.dumps(msg)) # Wait for the matching response while True: raw = self.ws.recv() data = json.loads(raw) if data.get("id") == self._msg_id: if "error" in data: raise CDPError(f"CDP error: {data['error']}") return data.get("result", {}) # else: it's an event, skip it def _evaluate(self, expression: str) -> Any: """Execute JavaScript in the page and return the result value.""" result = self._send("Runtime.evaluate", { "expression": expression, "returnByValue": True, "awaitPromise": True, }) remote_obj = result.get("result", {}) if remote_obj.get("subtype") == "error": raise CDPError(f"JS error: {remote_obj.get('description', remote_obj)}") return remote_obj.get("value") def _navigate(self, url: str): """Navigate the current tab to the given URL and wait for load.""" print(f"[cdp_publish] Navigating to {url}") self._send("Page.enable") self._send("Page.navigate", {"url": url}) time.sleep(PAGE_LOAD_WAIT) # ------------------------------------------------------------------ # Login check # ------------------------------------------------------------------ def check_login(self) -> bool: """ Navigate to Xiaohongshu creator center and check if the user is logged in. Returns True if logged in. If not logged in, prints instructions and returns False. """ self._navigate(XHS_LOGIN_CHECK_URL) time.sleep(2) # Check if we got redirected to a login page current_url = self._evaluate("window.location.href") print(f"[cdp_publish] Current URL: {current_url}") if "login" in current_url.lower(): print( "\n[cdp_publish] NOT LOGGED IN.\n" " Please scan the QR code in the Chrome window to log in,\n" " then run this script again.\n" ) return False print("[cdp_publish] Login confirmed.") return True def clear_cookies(self, domain: str = ".xiaohongshu.com"): """ Clear all cookies for the given domain to force re-login. Used when switching accounts. """ print(f"[cdp_publish] Clearing cookies for {domain}...") self._send("Network.enable") self._send("Network.clearBrowserCookies") # Also clear storage self._send("Storage.clearDataForOrigin", { "origin": "https://www.xiaohongshu.com", "storageTypes": "cookies,local_storage,session_storage", }) self._send("Storage.clearDataForOrigin", { "origin": "https://creator.xiaohongshu.com", "storageTypes": "cookies,local_storage,session_storage", }) print("[cdp_publish] Cookies and storage cleared.") def open_login_page(self): """ Navigate to the Xiaohongshu login page for QR code scanning. Used for initial login or after clearing cookies for account switch. """ self._navigate(XHS_LOGIN_CHECK_URL) time.sleep(2) current_url = self._evaluate("window.location.href") if "login" not in current_url.lower(): # Already logged in, navigate to login page explicitly self._navigate("https://creator.xiaohongshu.com/login") time.sleep(2) print( "\n[cdp_publish] Login page is open.\n" " Please scan the QR code in the Chrome window to log in.\n" ) # ------------------------------------------------------------------ # Publishing actions # ------------------------------------------------------------------ def _click_image_text_tab(self): """Click the '上传图文' tab to switch to image+text publish mode.""" print("[cdp_publish] Clicking '上传图文' tab...") tab_text = SELECTORS["image_text_tab_text"] selector = SELECTORS["image_text_tab"] clicked = self._evaluate(f""" (function() {{ var tabs = document.querySelectorAll('{selector}'); for (var i = 0; i < tabs.length; i++) {{ if (tabs[i].textContent.trim() === '{tab_text}') {{ tabs[i].click(); return true; }} }} return false; }})(); """) if not clicked: raise CDPError( f"Could not find '{tab_text}' tab. " "The page structure may have changed." ) print("[cdp_publish] Tab clicked, waiting for upload area...") time.sleep(TAB_CLICK_WAIT) def _upload_images(self, image_paths: list[str]): """Upload images via the file input element.""" if not image_paths: print("[cdp_publish] No images to upload, skipping.") return # Normalize paths (forward slashes for CDP) normalized = [p.replace("\\", "/") for p in image_paths] print(f"[cdp_publish] Uploading {len(image_paths)} image(s)...") # Enable DOM domain self._send("DOM.enable") # Get the document root doc = self._send("DOM.getDocument") root_id = doc["root"]["nodeId"] # Try primary selector, then fallback node_id = 0 for selector in (SELECTORS["upload_input"], SELECTORS["upload_input_alt"]): result = self._send("DOM.querySelector", { "nodeId": root_id, "selector": selector, }) node_id = result.get("nodeId", 0) if node_id: break if not node_id: raise CDPError( "Could not find file input element.\n" "The page structure may have changed. Check references/publish-workflow.md." ) # Use DOM.setFileInputFiles to set the files self._send("DOM.setFileInputFiles", { "nodeId": node_id, "files": normalized, }) print("[cdp_publish] Images uploaded. Waiting for editor to appear...") time.sleep(UPLOAD_WAIT) def _fill_title(self, title: str): """Fill in the article title.""" print(f"[cdp_publish] Setting title: {title[:40]}...") time.sleep(ACTION_INTERVAL) for selector in (SELECTORS["title_input"], SELECTORS["title_input_alt"]): found = self._evaluate(f"!!document.querySelector('{selector}')") if found: escaped_title = json.dumps(title) self._evaluate(f""" (function() {{ var el = document.querySelector('{selector}'); var nativeSetter = Object.getOwnPropertyDescriptor( window.HTMLInputElement.prototype, 'value' ).set; el.focus(); nativeSetter.call(el, {escaped_title}); el.dispatchEvent(new Event('input', {{ bubbles: true }})); el.dispatchEvent(new Event('change', {{ bubbles: true }})); }})(); """) print("[cdp_publish] Title set.") return raise CDPError("Could not find title input element.") def _fill_content(self, content: str): """Fill in the article body content using the TipTap/ProseMirror editor.""" print(f"[cdp_publish] Setting content ({len(content)} chars)...") time.sleep(ACTION_INTERVAL) for selector in (SELECTORS["content_editor"], SELECTORS["content_editor_alt"]): found = self._evaluate(f"!!document.querySelector('{selector}')") if found: escaped = json.dumps(content) self._evaluate(f""" (function() {{ var el = document.querySelector('{selector}'); el.focus(); var text = {escaped}; var paragraphs = text.split('\\n').filter(function(p) {{ return p.trim(); }}); var html = []; for (var i = 0; i < paragraphs.length; i++) {{ html.push('

' + paragraphs[i] + '

'); if (i < paragraphs.length - 1) {{ html.push('


'); }} }} el.innerHTML = html.join(''); el.dispatchEvent(new Event('input', {{ bubbles: true }})); }})(); """) print("[cdp_publish] Content set.") return raise CDPError("Could not find content editor element.") def _click_publish(self): """Click the publish button (found by text content).""" print("[cdp_publish] Clicking publish button...") time.sleep(ACTION_INTERVAL) btn_text = SELECTORS["publish_button_text"] clicked = self._evaluate(f""" (function() {{ // Strategy 1: search