{"id":1418,"date":"2022-11-15T00:04:51","date_gmt":"2022-11-14T16:04:51","guid":{"rendered":"http:\/\/zhang.mba\/?p=1418"},"modified":"2022-11-15T00:13:47","modified_gmt":"2022-11-14T16:13:47","slug":"python-jie-xihtml-huo-qu-jie-dian-de-nei-rong-pyth","status":"publish","type":"post","link":"https:\/\/zhang.mba\/index.php\/2022\/11\/15\/00\/04\/51\/1418\/python-jie-xihtml-huo-qu-jie-dian-de-nei-rong-pyth\/python\/zhangzhiqi\/","title":{"rendered":"python\u89e3\u6790html\u83b7\u53d6\u8282\u70b9\u7684\u5185\u5bb9"},"content":{"rendered":"<video controls><source src=''><\/video>\n<p>\u5728\u722c\u53d6\u7f51\u9875\u7684\u65f6\u5019\uff0c\u722c\u4e0b\u7684\u6570\u636e\u9700\u8981\u89e3\u6790html\u3002\u5982\u4e0b\u4ee3\u7801\u3002<\/p>\n<p>\u4f7f\u7528python3.x<\/p>\n<pre class=\"line-numbers\"><code class=\"language-python\">from bs4 import BeautifulSoup as bs\r\n\r\nhtml = '''&lt;html&gt;\r\n&lt;head&gt;\r\n    &lt;title class='ceshi'&gt;super \u54c8\u54c8  star&lt;\/title&gt;\r\n&lt;\/head&gt;\r\n&lt;body&gt;\r\n    \u5929\u4e0b\u7b2c\u4e00\u5e05\r\n    &lt;p class='sister'&gt;\r\n        \u662f\u4e0d\u662f\r\n    &lt;\/p&gt;\r\n    &lt;p id='seeyou'&gt;haha\u563b\u563b&lt;\/p&gt;\r\n&lt;\/body&gt;\r\n&lt;\/html&gt;'''\r\nstr = '''\u7528BeautifulSoup\u89e3\u6790\u6570\u636e  python3 \u5fc5\u987b\u4f20\u5165\u53c2\u6570\u4e8c'html.parser' \u5f97\u5230\u4e00\u4e2a\u5bf9\u8c61\uff0c\u63a5\u4e0b\u6765\u83b7\u53d6\u5bf9\u8c61\u7684\u76f8\u5173\u5c5e\u6027'''\r\nhtml = bs(html, 'html.parser')\r\n# \u8bfb\u53d6title\u5185\u5bb9\r\nprint(html.title)\r\nattrs = html.title.attrs\r\nprint(attrs)\r\nprint(attrs['class'][0])  # \u663e\u793aclass\u91cc\u9762\u7684\u5185\u5bb9\r\n\r\nprint(html.body)  # \u663e\u793abody\u5185\u5bb9\r\n\r\nprint(html.p.attrs)\r\nprint(html.select(\"#seeyou\")[0].string)  # \u89e3\u6790id\u662fseeyou\u7684\u6807\u7b7e\u91cc\u9762\u7684\u5185\u5bb9\r\n<\/code><\/pre>\n<p>\u6587\u7ae0\u6765\u6e90&#8212;-<a href=\"https:\/\/blog.csdn.net\/qq_36411874\">\u6e34\u671b\u98de\u7684\u9c7c<\/a><\/p>\n<!--CusAds0-->\n<div style=\"font-size: 0px; height: 0px; line-height: 0px; margin: 0; padding: 0; clear: both;\"><\/div>","protected":false},"excerpt":{"rendered":"<p>\u5728\u722c\u53d6\u7f51\u9875\u7684\u65f6\u5019\uff0c\u722c\u4e0b\u7684\u6570\u636e\u9700\u8981\u89e3\u6790html\u3002\u5982\u4e0b\u4ee3\u7801\u3002 \u4f7f\u7528python3.x from bs4 import BeautifulSoup as bs html = &#8221;'&lt;html&gt; &lt;head&gt; &lt;title class=&#8217;ceshi&#8217;&gt;super \u54c8\u54c8 star&lt;\/title&gt; &lt;\/head&gt; &lt;body&#038;g &#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_bbp_topic_count":0,"_bbp_reply_count":0,"_bbp_total_topic_count":0,"_bbp_total_reply_count":0,"_bbp_voice_count":0,"_bbp_anonymous_reply_count":0,"_bbp_topic_count_hidden":0,"_bbp_reply_count_hidden":0,"_bbp_forum_subforum_count":0},"categories":[12],"tags":[],"_links":{"self":[{"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/posts\/1418"}],"collection":[{"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/comments?post=1418"}],"version-history":[{"count":0,"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/posts\/1418\/revisions"}],"wp:attachment":[{"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/media?parent=1418"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/categories?post=1418"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/zhang.mba\/index.php\/wp-json\/wp\/v2\/tags?post=1418"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}