Python使用语雀API操作

资料

https://github.com/burpheart/yuque-crawl，
1. 限制：这个代码只能下载公开的仓库的 md 文件
2. 特点：可以下载别人的公开知识库，可以识别到分组
yuque-helper/yuque2book：已测不能运行
atian25/yuque-exporter：已测不能运行
瓦雀：https://karobben.github.io/2021/03/02/Python/yuqueAPI/
语雀官方 API：https://www.yuque.com/yuque/developer

教程

获取语雀 Token

网页端登陆语雀—点击头像—账户设置—Token

说明

语雀所有的开放 API 都需要 Token 验证之后才能访问
你需要在请求的 HTTP Headers 传入 X-Auth-Token 带入您的身份 Token 信息，用于完成认证

获取用户信息

import requests

USER = "xdd1997"
url_user = 'https://www.yuque.com/api/v2/users'
header = {"X-Auth-Token": "your Token"}
resu = requests.get(url_user, headers = header).json()
resu

获取用户/团队名下仓库列表

1
2
3

url_repo = 'https://www.yuque.com/api/v2/users/' + USER + "/repos"
Repo_Result = requests.get(url_repo, headers = header).json()['data']
Repo_Result

# 获得所有仓库的 id, id 是仓库的唯一标识
repo_ids = []
for item in Repo_Result:
    repo_ids.append(item["id"])

repo_ids

[4240****, 2120****, 1087****]

获得一个仓库下的文档列表

repo_id = '4240****'
url_docs = 'https://www.yuque.com/api/v2/repos/'+ repo_id +'/docs'
Doc_Result = requests.get(url_docs, headers = header).json()['data']
Doc_Result

获得一个仓库下所有文档的 slug

# slug 是文档的唯一标识
slugs = []
for item in Doc_Result:
    slugs.append(item['slug'])
slugs

获取单篇文档信息

slug = "gbhna********"
url = f"https://www.yuque.com/api/v2/repos/{repo_id}/docs/{slug}"
Repo_Result = requests.get(url, headers = header).json()
Repo_Result

获取某一篇文档内容

1 2	resu = Repo_Result["data"]["body"] resu

'---
title: Python导出语雀文档
categories: [Python]
tags: [Python，语雀]
date: 2023-10-24
updated: 2023-10-24
cover:  https://mypic2016.oss-cn-beijing.aliyuncs.com/picGo/202310241331546.png
---\n\n\n\n## 方法1\n\n1. 代码来源：[https://github.com/burpheart/yuque-crawl](https://github.com/burpheart/yuque-crawl)\n2. 限制：这个代码只能下载公开的仓库的md文件\n3. 根据自己需要，稍稍修改了下以便能下载自己指定的一些仓库，得到下面代码：\n```python\n# BY @burpheart\n# https://www.yuque.com/burpheart/phpaudit\n# https://github.com/burpheart\nimport sys\n\nimport requests\nimport json\nimport re\nimport os\nimport urllib.parse\n\ntset = []\n\n\ndef save_page(book_id, sulg, path):\n    docsdata = requests.get(\n        \'https://www.yuque.com/api/docs/\' + sulg + \'?book_id=\' + book_id + \'&merge_dynamic_data=false&mode=markdown\')\n    if (docsdata.status_code != 200):\n        print("文档下载失败 页面可能被删除 ", book_id, sulg,path, docsdata.content)\n        return\n    docsjson = json.loads(docsdata.content)\n\n    f = open(path, \'w\', encoding=\'utf-8\')\n    f.write(docsjson[\'data\'][\'sourcecode\'])\n    f.close()\n\n\ndef get_book(url, save_path):\n    docsdata = requests.get(url)\n    data = re.findall(r"decodeURIComponent\\(\\"(.+)\\"\\)\\);", docsdata.content.decode(\'utf-8\'))\n    docsjson = json.loads(urllib.parse.unquote(data[0]))\n    test = []\n    list = {}\n    temp = {}\n    md = ""\n    table = str.maketrans(\'\\/:*?"<>|\' + "\\n\\r", "___________")\n    prename = ""\n    if (os.path.exists(save_path + "/" + str(docsjson[\'book\'][\'id\'])) == False):\n        os.makedirs(save_path + "/" + str(docsjson[\'book\'][\'id\']))\n\n    for doc in docsjson[\'book\'][\'toc\']:\n        if (doc[\'type\'] == \'TITLE\' or doc[\'child_uuid\']!= \'\'):\n            filename = \'\'\n            list[doc[\'uuid\']] = {\'0\': doc[\'title\'], \'1\': doc[\'parent_uuid\']}\n            uuid = doc[\'uuid\']\n            temp[doc[\'uuid\']] = \'\'\n            while True:\n                if (list[uuid][\'1\'] != \'\'):\n                    if temp[doc[\'uuid\']] == \'\':\n                        temp[doc[\'uuid\']] = doc[\'title\'].translate(table)\n                    else:\n                        temp[doc[\'uuid\']] = list[uuid][\'0\'].translate(table) + \'/\' + temp[doc[\'uuid\']]\n                    uuid = list[uuid][\'1\']\n                else:\n                    temp[doc[\'uuid\']] = list[uuid][\'0\'].translate(table) + \'/\' + temp[doc[\'uuid\']]\n                    break\n            if ((os.path.exists(save_path + "/" + str(docsjson[\'book\'][\'id\']) + \'/\' + temp[doc[\'uuid\']])) == False):\n                os.makedirs(save_path + "/" + str(docsjson[\'book\'][\'id\']) + \'/\' + temp[doc[\'uuid\']])\n            if (temp[doc[\'uuid\']].endswith("/")):\n                md += "## " + temp[doc[\'uuid\']][:-1] + "\\n"\n            else:\n                md += "  " * (temp[doc[\'uuid\']].count("/") - 1) + "* " + temp[doc[\'uuid\']][\n                                                                         temp[doc[\'uuid\']].rfind("/") + 1:] + "\\n"\n        if (doc[\'url\'] != \'\'):\n            if doc[\'parent_uuid\'] != "":\n                if (temp[doc[\'parent_uuid\']].endswith("/")):\n                    md += " " * temp[doc[\'parent_uuid\']].count("/") + "* [" + doc[\'title\'] + "](" + urllib.parse.quote(\n                        temp[doc[\'parent_uuid\']] + "/" + doc[\'title\'].translate(table) + \'.md\') + ")" + "\\n"\n                else:\n                    md += "  " * temp[doc[\'parent_uuid\']].count("/") + "* [" + doc[\'title\'] + "](" + urllib.parse.quote(\n                        temp[doc[\'parent_uuid\']] + "/" + doc[\'title\'].translate(table) + \'.md\') + ")" + "\\n"\n\n                save_page(str(docsjson[\'book\'][\'id\']), doc[\'url\'],\n                          save_path + "/" + str(docsjson[\'book\'][\'id\']) + \'/\' + temp[doc[\'parent_uuid\']] + "/" + doc[\n                              \'title\'].translate(table) + \'.md\')\n            else:\n                md += " " + "* [" + doc[\'title\'] + "](" + urllib.parse.quote(\n                    doc[\'title\'].translate(table) + \'.md\') + ")" + "\\n"\n                save_page(str(docsjson[\'book\'][\'id\']), doc[\'url\'],\n                          save_path + "/" + str(docsjson[\'book\'][\'id\']) + "/" + doc[\n                              \'title\'].translate(table) + \'.md\')\n    f = open(save_path + "/" + str(docsjson[\'book\'][\'id\']) + \'/\' + "/SUMMARY.md", \'w\', encoding=\'utf-8\')\n    f.write(md)\n    f.close()\n\n\nif __name__ == \'__main__\':\n    repos ={"CAD_CAE":"cadcae",\n            "编程语言":"program",\n            "博客文章-公开": "blog"}\n\n    for key, value in repos.items():\n        url = f"https://www.yuque.com/xdd1997/{value}"\n        save_path = f"xdd1997/{key}"\n        get_book(url, save_path)\n        print(f"{key}下载完成")\n        \n```\n\n\n## 方法二\n希望能找到一种可以下载private仓库的方法
已测试不能运行的库\n\n- [yuque-helper/yuque2book](https://github.com/yuque-helper/yuque2book)\n- [atian25/yuque-exporter](https://github.com/atian25/yuque-exporter)\n\n\n\n## 方法三\n参考： [https://karobben.github.io/2021/03/02/Python/yuqueAPI/](https://karobben.github.io/2021/03/02/Python/yuqueAPI/)\n\n'

保存文档内容为 md 文件

1 2	with open(fil_path, "w", encoding="utf-8") as fw: fw.write(resu)

案例

下载语雀所有 Book 的所有文章

TODO: 分组功能需要学习获取知识库目录

import os
import shutil
import re
import requests


# =========================
def deal_yuque_md(content):
    """ 处理语雀一篇文档body中的内容 """

    # 去除字符串如
    p1 = re.compile(r'')
    resu = p1.findall(content)
    if len(resu)>0:
        for str_ii in resu:
            content = content.replace(str_ii, "")

    # 去除图片链接后面的东西
    p2 = re.compile(r'!\[.*\]\(.*\)')
    resu = p2.findall(content)
    if len(resu) > 0:
        url_more_list = []
        for str_ii in resu:
            pat = re.compile(r"\(.*\)")
            resu3 = pat.findall(str_ii)
            if len(resu3)>0:
                pat = re.compile(r"#.*\)")
                resu4 = pat.findall(resu3[0])
                if len(resu4)>0:
                    url_more_list.append(resu4[0])
        for kk in url_more_list:
            content = content.replace(kk, ")")

    return content


# =============================
def login_get_doc(yuque_token):
    """ 获取语雀文档链接
    Args:
        yuque_token: 从yuque.com处获得的token

    Returns:
        info: 字典形式，返回header.仓库列表.所有文档链接
    """
    url_user = 'https://www.yuque.com/api/v2/user'
    header = {"X-Auth-Token": yuque_token}
    resu = requests.get(url_user, headers=header).json()
    user_name = resu["data"]["login"]

    # 获取仓库信息
    url_repo = 'https://www.yuque.com/api/v2/users/' + user_name + "/repos"
    Repo_Result = requests.get(url_repo, headers=header).json()['data']
    # print(Repo_Result)

    # 获取所有文章链接
    article_url_list = []
    for item in Repo_Result:
        if item['type'] == "Book":
            repo_id = item['id']
            url_docs = 'https://www.yuque.com/api/v2/repos/' + str(repo_id) + '/docs'
            Doc_Result = requests.get(url_docs, headers=header).json()['data']

            for ii in Doc_Result:
                slug = ii['slug']
                url = f"https://www.yuque.com/api/v2/repos/{repo_id}/docs/{slug}"
                article_url_list.append(url)
    info = {"header":header, "Repo_Result":Repo_Result, "article_url_list":article_url_list }
    return info

# ============================================
def download_all_doc(info, doc_download_path):
    """创建文件夹并下载文章"""

    header = info["header"]
    Repo_Result = info["Repo_Result"]
    article_url_list = info["article_url_list"]

    # 创建相应文件夹
    table = str.maketrans('\/:*?"<>|' + "\n\r", "___________")  # 映射表
    if os.path.exists(doc_download_path):
        shutil.rmtree(doc_download_path)  # del folder

    for item in Repo_Result:
        if item['type'] == "Book":
            repo_name = item['name'].translate(table)
            path_repo = os.path.join(doc_download_path, repo_name)
            if not os.path.exists(path_repo):
                os.makedirs(path_repo)

    # 计算文章数目
    count_sum = 0
    for item in Repo_Result:
        if item['type'] == "Book":
            url_docs = 'https://www.yuque.com/api/v2/repos/' + str(item['id']) + '/docs'
            Doc_Result = requests.get(url_docs, headers=header).json()['data']
            count_sum = count_sum + len(Doc_Result)
    # print(count_sum)

    # 下载文章
    count = 0
    for url in article_url_list:
        count += 1
        single_doc = requests.get(url, headers=header).json()
        article_title = single_doc["data"]["title"].translate(table)
        article_body = single_doc["data"]["body"]
        repo_name = single_doc["data"]["book"]["name"].translate(table)
        print(f"正在下载文章：{count}/{count_sum}:{repo_name}/{article_title} ")
        fil_path = os.path.join(doc_download_path, repo_name, article_title + ".md")

        with open(fil_path, "w", encoding="utf-8") as fw:
            resu = deal_yuque_md(article_body)
            fw.write(resu)

# ========================
if __name__ == "__main__":
    yuque_token = "***"
    info = login_get_doc(yuque_token)
    doc_download_path = r"语雀文章下载位置"
    download_all_doc(info,doc_download_path)

https://github.com/xie-dd/yuque-book-download.git

修改一篇文章

官方文档

我们的代码

# -*- coding: utf-8 -*-
# @File   : test.py
# @Author : xdd2026@qq.com
# @Data   : 2022/11/05 18:54
# @Purpose:
import json
import requests


if __name__ == "__main__":
    """ 获取个人信息 """
    yuque_token = ""
    url = 'https://www.yuque.com/api/v2/user'
    header = {"X-Auth-Token": yuque_token}
    resu = requests.get(url, headers=header).json()
    user_name = resu["data"]["login"]

    """ 获取仓库信息 """
    url = 'https://www.yuque.com/api/v2/users/' + user_name + "/repos"
    repo = requests.get(url, headers=header).json()['data']
    repo_str = json.dumps(repo, indent=4)
    print(repo_str)

    """ 更新文章 """
    book_id = "48856153"
    doc_slug = "144301227"
    url = f"https://www.yuque.com/api/v2/repos/{book_id}/docs/{doc_slug}"

    my_body = "xdd-xdd-xdd"
    my_title = "xdd3"
    data = {
        "slug": doc_slug,
        "title": my_title,
        "public": 1,           # 0:私密, 1:公开
        "format": "markdown",  # "markdown" "html" "lake"
        "body": my_body
    }

    responses = requests.put(url, data=data, headers=header)
    if responses.status_code == 200:
        print("文章更新完成")

创建一篇文章

官方文档

我们的代码

import json
import requests


def check_is_exist(book_id, my_slug):
    url = f"https://www.yuque.com/api/v2/repos/{book_id}/docs/{my_slug}"
    responses = requests.get(url, headers=header)
    if responses.status_code == 404:
        isexist = 0
    elif responses.status_code == 200:
        isexist = 1
    else:
        raise ValueError("出现错误")
    return isexist


if __name__ == "__main__":
    """ 获取个人信息 """
    yuque_token = ""
    url = 'https://www.yuque.com/api/v2/user'
    header = {"X-Auth-Token": yuque_token}
    resu = requests.get(url, headers=header).json()
    user_name = resu["data"]["login"]

    """ 获取仓库信息 """
    url = 'https://www.yuque.com/api/v2/users/' + user_name + "/repos"
    repo = requests.get(url, headers=header).json()['data']
    repo_str = json.dumps(repo, indent=4)
    print(repo_str)

    """ 发表文章 """
    book_id = '48856153'
    url = f"https://www.yuque.com/api/v2/repos/{book_id}/docs"

    # 先检查slug是存在
    my_slug = "144301227"
    my_title = "Python提交的文档27"
    my_body = "XDD"
    isexist = check_is_exist(book_id, my_slug)

    if isexist == 1:
        print(" my_slug 已经存在")
    else:
        data = {
            "slug": my_slug,
            "title": my_title,
            "public": 1,           # 0:私密, 1:公开
            "format": "markdown",  # "markdown" "html" "lake"
            "body": my_body
        }

        responses = requests.post(url, data=data, headers=header)
        if responses.status_code == 200:
            print("文章创建完成")

    """ 将文档添加到知识库目录 """
    # 获取某文档 id
    url = f"https://www.yuque.com/api/v2/repos/{book_id}/docs/{my_slug}"
    responses = requests.get(url, headers=header)
    doc_id = responses.json()['data']['id']

    # 更新目录
    url = f"https://www.yuque.com/api/v2/repos/{book_id}"
    data = {
        "action": "appendNode",  # appendNode:尾插, prependNode:头插,
        "action_mode": "child",  # sibling:同级, child:子级
        "type": "DOC",           # DOC:文档, LINK:外链, TITLE:分组
        "doc_ids": doc_id,       # 文档 ID 数组, Array of integers
    }
    responses = requests.put(url, data=data, headers=header)
    if responses.status_code == 200:
        print("文章添加到目录完成")