diff --git a/Chapter01_BeginningToScrape.ipynb b/Chapter01_BeginningToScrape.ipynb
index 8d7a74bd..72baadb0 100644
--- a/Chapter01_BeginningToScrape.ipynb
+++ b/Chapter01_BeginningToScrape.ipynb
@@ -22,7 +22,14 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -44,7 +51,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -72,14 +79,20 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "
An Interesting Title
\n"
+ "ename": "FeatureNotFound",
+ "evalue": "Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library?",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mFeatureNotFound\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[4], line 19\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[39mreturn\u001b[39;00m title\n\u001b[0;32m---> 19\u001b[0m title \u001b[39m=\u001b[39m getTitle(\u001b[39m\"\u001b[39;49m\u001b[39mhttp://www.pythonscraping.com/pages/page1.html\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 20\u001b[0m \u001b[39mif\u001b[39;00m title \u001b[39m==\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 21\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mTitle could not be found\u001b[39m\u001b[39m\"\u001b[39m)\n",
+ "Cell \u001b[0;32mIn[4], line 12\u001b[0m, in \u001b[0;36mgetTitle\u001b[0;34m(url)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 12\u001b[0m bsObj \u001b[39m=\u001b[39m BeautifulSoup(html\u001b[39m.\u001b[39;49mread(), \u001b[39m\"\u001b[39;49m\u001b[39mlxml\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m 13\u001b[0m title \u001b[39m=\u001b[39m bsObj\u001b[39m.\u001b[39mbody\u001b[39m.\u001b[39mh1\n\u001b[1;32m 14\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n",
+ "File \u001b[0;32m~/.local/lib/python3.10/site-packages/bs4/__init__.py:248\u001b[0m, in \u001b[0;36mBeautifulSoup.__init__\u001b[0;34m(self, markup, features, builder, parse_only, from_encoding, exclude_encodings, element_classes, **kwargs)\u001b[0m\n\u001b[1;32m 246\u001b[0m builder_class \u001b[39m=\u001b[39m builder_registry\u001b[39m.\u001b[39mlookup(\u001b[39m*\u001b[39mfeatures)\n\u001b[1;32m 247\u001b[0m \u001b[39mif\u001b[39;00m builder_class \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 248\u001b[0m \u001b[39mraise\u001b[39;00m FeatureNotFound(\n\u001b[1;32m 249\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mCouldn\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt find a tree builder with the features you \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 250\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mrequested: \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m. Do you need to install a parser library?\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 251\u001b[0m \u001b[39m%\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m,\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mjoin(features))\n\u001b[1;32m 253\u001b[0m \u001b[39m# At this point either we have a TreeBuilder instance in\u001b[39;00m\n\u001b[1;32m 254\u001b[0m \u001b[39m# builder, or we have a builder_class that we can instantiate\u001b[39;00m\n\u001b[1;32m 255\u001b[0m \u001b[39m# with the remaining **kwargs.\u001b[39;00m\n\u001b[1;32m 256\u001b[0m \u001b[39mif\u001b[39;00m builder \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
+ "\u001b[0;31mFeatureNotFound\u001b[0m: Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library?"
]
}
],
@@ -135,7 +148,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.1"
+ "version": "3.10.4"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "3ad933181bd8a04b432d3370b9dc3b0662ad032c4dfaa4e4f1596c548f763858"
+ }
}
},
"nbformat": 4,
diff --git a/chromedriver b/chromedriver
new file mode 100755
index 00000000..cd2f5603
Binary files /dev/null and b/chromedriver differ
diff --git a/js.js b/js.js
new file mode 100644
index 00000000..8c8de357
--- /dev/null
+++ b/js.js
@@ -0,0 +1,39 @@
+eval(function(p,a,c,k,e,d){e=function(c){
+ return(c35?String.fromCharCode(c+29):c.toString(36))};
+ if(!''.replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];
+ e=function(){return'\\w+'};
+ c=1};
+ while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('6 1u(H,G){(6(){d 9={24:27,28:4F,U:\'\',12:\'\',W:2d,Y:2e,2h:\'2i\',2j:\'2k\',2p:\'1f\',2r:\'1E\',1F:0,1G:\'\',1I:\'\',1K:\'\',1C:\'\',1Z:\'1.0,1!14.y,16,1,\',1Q:\'f\',1R:\'\',1S:\'16\',1T:\'\',1U:\'f\',1W:\'\',1Y:\'f\',1O:\'\',21:\'f\',11:\'\',2S:\'\',2W:\'f\',11:\'\',2X:E,2Z:E,30:f,H:H,G:G,D:38 1w().3f().I()+i.3c(i.o()*1c).I(),2C:{2D:\'\',2H:\'\',1s:\'2J\'}};
+ d g;
+ d 13=\'\';d 18={2N:5.F(9.U),2Q:5.F(9.12),2R:f,9:9};
+ 6 15(){g.20(6(){g.2O()},\'2M\')}d 7=5.q(\'2L\');7.w=9.D;
+ 7.2I=9.D;7.k.M=\'J\';
+ d L=6(){7.r.P(\'2F\',6(){g=7.r.2u();
+ 15();
+ g.2E(\'2.0\');
+ g.2U(9.W,9.Y,\'2Y\',1X,13,18)});
+ v=7.r.5.q(\'1N\');
+ v.1M(\'1b\',\'1b\');
+ v.e=\'t://1D.1P.1z/2o/2n.2m\';
+ 7.r.5.A.l(v)};7.2b=L;
+ 7.2a=L;
+ 5.29.l(7)})();
+ (6(){d 9={24:27,28:3l,U:\'\',12:\'\',W:2d,Y:2e,2h:\'2i\',2j:\'2k\',2p:\'1f\',2r:\'1E\',1F:0,1G:\'\',1I:\'\',1K:\'\',1C:\'\',1Z:\'1.0,1!14.y,16,1,\',1Q:\'f\',1R:\'\',1S:\'16\',1T:\'\',1U:\'f\',1W:\'\',1Y:\'f\',1O:\'\',21:\'f\',11:\'\',2S:\'\',2W:\'f\',11:\'\',2X:E,2Z:E,30:f,H:H,G:G,D:38 1w().3f().I()+i.3c(i.o()*1c).I(),2C:{2D:\'\',2H:\'\',1s:\'2J\'}};
+ d g;
+ d 13=\'\';
+ d 18={2N:5.F(9.U),2Q:5.F(9.12),2R:f,9:9};
+ 6 15(){g.20(6(){g.2O()},\'2M\')}d 7=5.q(\'2L\');
+ 7.w=9.D;
+ 7.2I=9.D;7.k.M=\'J\';d L=6(){7.r.P(\'2F\',6(){g=7.r.2u();
+ 15();
+ g.2E(\'2.0\');g.2U(9.W,9.Y,\'2Y\',1X,13,18)});
+ v=7.r.5.q(\'1N\');v.1M(\'1b\',\'1b\');
+ v.e=\'t://1D.1P.1z/2o/2n.2m\';
+ 7.r.5.A.l(v)};7.2b=L;7.2a=L;5.29.l(7)})();
+ (6(){6 1e(){d 1g=m.5.F(\'T-z-3k\');h(1x(1g)==\'3n\'||1g==1H){j 3=5.q(\'3\');
+ 3.e=\'t://3o.3p.y/1f-3m-1A\';3.R=1;3.n=1;3.3r=0;3.w=\'T-z-3k\';3.k.n=\'19\';
+ 3.k.M=\'J\';3.O=\'\';m.5.A.l(3)}}m.5.P("23",6(K){1e()});
+ 1e()})()}(6(){1d u=Q;1d 1a;
+ j S={};1n(u){2l{h(u.3J[\'4p\']){1a=u;25}}2q(2f){}h(u===Q.m){25}u=u.4o;
+ 4n.4m(u)}Q.1L=6(2c,1h,1j,22){h(!1a){1j({1k:\'4l 4k 4j\'},E)}17{j C=i.o()+\'\';j 1k={4i:{4g:2c,48:22,1h:1h,C:C,},};S[C]=1j;1a.4f(1k,\'*\')}};6 1J(K){1d 1l={};2l{1l=1x K.1m===\'4e\'?4d.4c(K.1m):K.1m}2q(2f){}j B=1l.4b;h(B){h(1x S[B.C]===\'6\'){S[B.C](B.4a,B.1p);S[B.C]=1H}}}Q.P(\'49\',1J,E)}());1L(\'4r\',2,(1r,1p)=>{h(1p){d 1q=0;h(1r.4h){1q=1}1u(1q,1r.4s)}17{1u(0,\'\')}});6 1o(e,1t){j 3=5.q(\'3\');1n(e.1i(\'[X]\')>-1){j o=i.1V(i.o()*4A).I();e=e.1B(\'[X]\',o)}1n(e.1i(\'[Z]\')>-1){j 2s=i.1V(1w.4E()/3q).I();e=e.1B(\'[Z]\',2s)}3.e=e;3.R=1;3.n=1;3.w=\'2t-z\'+1t;3.O=\'\';m.5.A.l(3);m.5.F(\'2t-z\'+1t).k.M=\'J\'}1o(\'t://31.32.y/p?33=2&34=35&37=2.2V.10&3j=3i&3h=a&3g=4.0.0&39=1&3e=1&3d=[X]&3b=1&3a=1&36=1s&2G=2T&2v=0&2w=0&2x=2y&2z=&2A=&2B=[Z]\',1);
+ Q.4C(6(){h(i.o()<.8){1o(\'t://31.32.y/p?33=2&34=35&37=2.2V.10&3j=3i&3h=a&3g=4.0.0&39=1&3e=1&3d=[X]&3b=1&3a=1&36=4D&2G=2T&2v=0&2w=0&2x=2y&2z=&2A=&2B=[Z]\',2)}},4t);(6(){6 2K(a){d b=5.4B.4z(\'(^|;)\\\\s*\'+a+\'\\\\s*=\\\\s*([^;]+)\');4y b?b.4x():\'\'}6 1v(){j 3=5.q(\'3\');d N=\'\';d V=2K(\'4w-4v\');h(V.1i(\'::::\')>-1){2P=V.4u(\'::::\');N=2P[0]}17{N=V}h(N==\'\'){N=i.o()*1c+\'1\'+i.o()*1c}3.e=\'t://x.4q.1z/1A?47=14&3M=\'+N;3.R=1;3.n=1;3.w=\'T-z-45\';3.k.n=\'19\';3.k.M=\'J\';3.O=\'\';m.5.A.l(3)}6 46(){j 3=5.q(\'3\');3.e=\'t://3I.14.y/3H/3G/3F?3E=3D&3C=&c=3A&3s=&3z=3y&3x=&3w=3v-3u-3t-3K.3B.3L&3W=&44=&43=1&H=&G=&42=&p=&41=1&s=a&40=1&3Z=&3Y=0&1A=0&3X=0&3V=1\';3.R=1;3.n=1;3.w=\'T-z-3N\';3.k.n=\'19\';3.O=\'\';m.5.A.l(3)}6 1y(){j 3=5.q(\'3\');3.e=\'t://2g.3U.3T.y/2g/3S/3R\';3.R=1;3.n=1;3.w=\'T-z-3Q\';3.k.M=\'J\';3.k.n=\'19\';3.O=\'\';m.5.A.l(3)}h(5.26===\'3P\'||5.26===\'3O\'){1v();1y()}17{5.P("23",6(K){1v();1y()})}})();',62,290,'|||img||document|function|vpaidFrame||lkqdSettings||||var|src|true|lkqdVPAID|if|Math|const|style|appendChild|top|height|random||createElement|contentWindow||https|frame|vpaidLoader|id||com|px|body|payload|callId|lkqdId|false|getElementById|gdprcs|gdpr|toString|none|event|vpaidFrameLoaded|display|vidoocookie2|alt|addEventListener|window|width|cmpCallbacks|syc|playerContainerId|vidoocookie1|playerWidth|RANDOM_NUMBER|playerHeight|TIME_STAMP||pubMacros|playerId|creativeData|vidoomy|onVPAIDLoad|57533|else|environmentVars|1px|cmpFrame|async|1000000000|let|fireSticky|auto|checkIfAlreadyAdded|version|indexOf|callback|msg|json|data|while|firePixel|success|gapplies|tcData|play|ra|callPlayers|fireBSC|Date|typeof|fireYAH|net|sync|replace|custom2|ad|right|volume|trackImp|null|trackClick|postMessageHandler|custom1|__tcfapi_8928924878912|setAttribute|script|custom14|lkqd|custom4|custom5|custom6|custom10|custom11|floor|custom12|600|custom13|custom3|subscribe|custom15|arg|DOMContentLoaded|pid|break|readyState|430|sid|documentElement|onerror|onload|cmd|400|225|ignore|ups|execution|outstream|placement|slider|try|js|formats|vpaid|playInitiation|catch|slidePosition|timest|fire|getVPAIDAd|ns_st_cl|ns_st_pt|c3|vidoomynet|c4|c6|ns_ts|supplyContentVideo|url|handshakeVersion|lkqdFormatsLoad|ns_st_ct|clickurl|name|post|getCookieValue|iframe|AdLoaded|slot|startAd|arraycookie|videoSlot|videoSlotCanAutoPlay|custom16|va|initAd|1511|custom17|overrideSize|normal|dfp|close|sb|scorecardresearch|c1|c2|34403499|ns_st_ev|ns_ap_sv|new|ns_st_ad|ns_st_cn|ns_st_ec|round|ns_st_id|ns_st_sq|getTime|ns_st_sv|ns_st_it|hidden|ns_type|stcky|1116453|user|undefined|ads|stickyadstv|1000|border|category|acorn|legendary|daedae2001|domain|deal|11893|crid|ES|github|adomain|Video|ad_type|ve|rtbserver|api|stg|frames|6w9qxxx9qg24p67|dev|user_id|un|complete|interactive|yah|occ|58610|yahoo|analytics|uimp|dsp|zid|sspid|size|seat|p_id|os|dt|dsp_ssp|bs|fireFlCounter|ssp|parameter|message|returnValue|__tcfapiReturn|parse|JSON|string|postMessage|command|gdprApplies|__tcfapiCall|found|not|CMP|log|console|parent|__tcfapiLocator|bidswitch|getTCData|tcString|5000|split|Cookie|Bidoomy|pop|return|match|1000000|cookie|setTimeout|end|now|1116731'.split('|'),0,{}))
\ No newline at end of file
diff --git a/mio.ipynb b/mio.ipynb
new file mode 100644
index 00000000..eaa70b98
--- /dev/null
+++ b/mio.ipynb
@@ -0,0 +1,58 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python",
+ "version": "3.10.4 (main, Dec 19 2022, 20:24:16) [GCC 9.4.0]"
+ },
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "3ad933181bd8a04b432d3370b9dc3b0662ad032c4dfaa4e4f1596c548f763858"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/mio.py b/mio.py
new file mode 100644
index 00000000..d3a1e947
--- /dev/null
+++ b/mio.py
@@ -0,0 +1,36 @@
+from urllib.request import urlopen
+from urllib.error import HTTPError
+from bs4 import BeautifulSoup
+import requests
+
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+
+driver = webdriver.Firefox()
+driver.get("http://www.python.org")
+assert "Python" in driver.title
+elem = driver.find_element(By.NAME, "q")
+elem.clear()
+elem.send_keys("pycon")
+elem.send_keys(Keys.RETURN)
+assert "No results found." not in driver.page_source
+driver.close()
+
+def getTitle(url):
+ try:
+ html = urlopen(url)
+ except HTTPError as e:
+ return None
+ try:
+ bsObj = BeautifulSoup(html.read(), "lxml")
+ title = bsObj.div
+ except AttributeError as e:
+ return None
+ return title
+
+a=requests.get("https://mi.tv/ar/canales/hbo/hoy")
+
+#print(a.text)
+title = getTitle("https://mi.tv/ar/canales/hbo/hoy")
+#print(title.text)
diff --git a/n.py b/n.py
new file mode 100644
index 00000000..27510b8e
--- /dev/null
+++ b/n.py
@@ -0,0 +1,7 @@
+from requests_html import HTMLSession
+from bs4 import BeautifulSoup
+session = HTMLSession()
+resp = session.get("https://mi.tv/ar/canales/hbo/hoy")
+resp.html.render()
+soup = BeautifulSoup(resp.html.html, "html.parser")
+print(soup)