
    [6g              	       "   d dl mZ d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
Z
d dlZd dlmZ d dlZd dlZd dlmZ d dlmc mZ d dlZd dlmZmZmZmZmZmZmZm Z m!Z! d dl"m#Z$ d d	l%m&Z&m'Z' d d
l(m)Z)  ej*        g d          d             Z+d Z,d Z-d Z.d Z/ ej*         ej0        d ej1        d           ej1        d          g           ej0        d ej1        d                    g          d             Z2 G d d          Z3dS )    )Iterator)partial)BytesIOStringION)Path)URLError)is_platform_windows)	NA	DataFrame
MultiIndexSeries	Timestamp
date_rangeread_csv	read_htmlto_datetime)ArrowStringArrayStringArray)file_path_to_url)zchinese_utf-16.htmlzchinese_utf-32.htmlzchinese_utf-8.htmlzletz_latin1.html)paramsc                 (     |ddd| j                   S )z6Parametrized fixture for HTML encoding test filenames.iodatahtml_encoding)param)requestdatapaths     X/var/www/surfInsights/venv3-11/lib/python3.11/site-packages/pandas/tests/io/test_html.pyhtml_encoding_filer   (   s     8D&/7=AAA    c                 n   t          |           t          |          k    s*J dt          |            dt          |                       d}t          t          d | |                    }|s
J |            t          | |          D ],\  }}t	          j        ||g|R i | |j        r
J d            -d S )Nz*lists are not of equal size len(list1) == z, len(list2) == z$not all list elements are DataFramesc                 V    t          | t                    ot          |t                    S N
isinstancer   )xys     r   <lambda>z(assert_framelist_equal.<locals>.<lambda>>   s    Ay11NjI6N6N r    zframes are both empty)lenallmapziptmassert_frame_equalempty)list1list2argskwargsmsgboth_framesframe_iframe_js           r   assert_framelist_equalr8   5   s    u::U###	&U	& 	&U	& 	& $##
 1CNN	
 	
 K u-- : :
gw@@@@@@@=99"99999: :r    c           	      &   t          j        d          }t          j        d           |                     |dd           t          j        t          d          5  t           |ddd	d
          d           d d d            d S # 1 swxY w Y   d S )Nbs4html5lib__version__z4.2zPandas requires versionmatchr   r   html	spam.htmlflavor)pytestimportorskipsetattrraisesImportErrorr   )monkeypatchr   r:   s      r   test_bs4_version_failsrI   I   s    

e
$
$C

###]E222	{*C	D	D	D M M((4==eLLLLM M M M M M M M M M M M M M M M M Ms   BB
B
c                      d} d}d|z   dz   }t          j        t          |          5  t          t	          |           d|           d d d            d S # 1 swxY w Y   d S )Nz
google.comzinvalid flavorz\{z \} is not a valid set of flavorsr=   googler>   rB   )rC   rF   
ValueErrorr   r   )urlrB   r4   s      r   test_invalid_flavorrO   R   s    
CF
&.>
>C	z	-	-	- @ @(3--x????@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @s    AAAc                    t          j        d           t          j        d           t          j        d            | dddd          }t          |ddg	          }t          |ddg	          }t          ||           d S )
Nr:   lxmlr;   r   r   r?   valid_markup.htmlr   )	index_colrB   )rC   rD   r   r8   )r   filenamedfs_lxmldfs_bs4s       r   test_same_orderingrW   [   s    



###xff.ABBHQx@@@HAug>>>G8W-----r    r:   r;   )marksrQ   c                 8    t          t          | j                  S )NrA   )r   r   r   )r   s    r   flavor_read_htmlrZ   f   s     9W]3333r    c            	          e Zd Zd Zej        d             Zej        d             Zd Zd Z	ej
        j        ej
        j        d                         Zej
        j        ej
        j        d                         Zej
        j        d             Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$ej
        j        ej
        j        d                         Z%ej
        j        ej
        j        ej
        j        d                                     Z&ej
        j        d             Z'ej
        j        d              Z(ej
        j        d!             Z)ej
        j        d"             Z*ej
        j        d#             Z+ej
        j        d$             Z,ej
        j        d%             Z-ej
        j        d&             Z.ej
        j        d'             Z/d( Z0ej        d)             Z1ej
        j        ej
        j        d*                         Z2ej
        j        ej
        j        d+                         Z3d, Z4d- Z5d. Z6d/ Z7d0 Z8d1 Z9ej
        j        d2             Z:ej
        j        d3             Z;d4 Z<d5 Z=d6 Z>d7 Z?d8 Z@d9 ZAd: ZBd; ZCd< ZDd= ZEd> ZFd? ZGd@ ZHej
        I                    dAdBdCg          dD             ZJdE ZKdF ZLdG ZMdH ZNdI ZOdJ ZPdK ZQej
        j        dL             ZRdM ZSdN ZTej
        I                    dOdB eUdPg          dQfdC eUdRg           eUdPg          fg          dS             ZVej
        I                    dTdBdCg          dU             ZWej
        X                    dV          dW             ZYdX ZZdY Z[ej
        j        ej
        j        dZ                         Z\d[ Z]d\ Z^ej
        I                    dAg d]          d^             Z_d_ Z`d` Zada Zbdb ZcdQS )cTestReadHtmlc                     d}t          j        t          |          5   |d           d d d            d S # 1 swxY w Y   d S )NzPassing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.r=   a  <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
                <tbody>
                    <tr>
                        <td>3</td>
                        <td>4</td>
                    </tr>
                </tbody>
            </table>)r-   assert_produces_warningFutureWarning)selfrZ   r4   s      r   test_literal_html_deprecationz*TestReadHtml.test_literal_html_deprecationq   s    > 	 'SAAA 	 	  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   7;;c                      |dddd          S )Nr   r   r?   r@    r`   r   s     r   	spam_datazTestReadHtml.spam_data   s    xffk:::r    c                      |dddd          S )Nr   r   r?   banklist.htmlrc   rd   s     r   banklist_datazTestReadHtml.banklist_data   s    xffo>>>r    c                    t          t          j                            d                              d          t	          j        t          d          t                                                  dj	                  
                    t                    }|                                } |t          |          ddid	
          d	         }t          j        ||           d S )N   )      abcdtypecolumnsz{:.3f}class	dataframer   )attrsrS   )r   nprandomdefault_rngpdIndexlistobjectr+   formatastypefloatto_htmlr   r-   r.   )r`   rZ   dfoutress        r   test_to_html_compatz TestReadHtml.test_to_html_compat   s    	%%a((//77eF;;;  
 S!!&&-- 	 jjllSMM';!71
 
 

 	c2&&&&&r    c                   	
 t          t          dt          j        dgd          t          g dd          t          dt          j        dgd          t          g d	d          g d
g dg dg dd          }|dk    ret	          t          j        g dt          j                            }t	          t          j        ddt          gt          j                            }n|dk    r[t          j	        d          ddl
m	  	                    g d                    } 	                    g d                    }n\t          j	        d          t                              g d                    }t                              g d                    }|                    d          }t          j        d|          5   |t!          |          |          d         }d d d            n# 1 swxY w Y   t          t          dt          j        dgd          t          g dd          t          dt          j        dgd          t          g d	d          t          ddt          gd          t          g dd          ||d          
|dk    r,dd lddl
m	 t          	
fd
j        D                       
t'          j        |
           d S )N   rl   Int64rn   )r   rj   rl         ?      @Float64)r   g       @r   )TFN)TFTabc)r   r   N)r   r   r   defghpythonr   r   pyarrowr   )ArrowExtensionArrayFindexzmode.string_storagedtype_backendTbooleanc           	      ^    i | ])}|                      |         d                     *S )T)from_pandas)array).0colr   expectedpas     r   
<dictcomp>z3TestReadHtml.test_dtype_backend.<locals>.<dictcomp>   sL        ,,RXXhsmQUX-V-VWW  r    )r   r   ru   nanr   r   object_r
   rC   rD   pandas.arraysr   r   r   rx   option_contextr   r   rq   r-   r.   )r`   string_storager   rZ   r   string_arraystring_array_nar   resultr   r   r   s            @@@r   test_dtype_backendzTestReadHtml.test_dtype_backend   s   QN':::IIIW555S"&#.i@@@OOO9===(((((($__%%%	 	
 
 X%%&rxrz'R'R'RSSL)"(Cb>*T*T*TUUOOi''$Y//B999999..rxx/H/HIIL11"((;K;K;K2L2LMMOO$Y//B+BHH___,E,EFFL.rxx8H8H8H/I/IJJOjjuj%%4nEE 	U 	U%%hsmm=QQQRSTF	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U QN':::IIIW555S"&#.i@@@OOO9===T5"-Y???///yAAA!$	 	
 
 I%%    999999      '/   H 	fh/////s   !G>>HHc                    t          |d          5 }|                    |                                            ||j        d          } ||j        d          }d d d            n# 1 swxY w Y   t	          ||           d S )Nutf-8encodingcontentFirst Federal Bank of Floridar=   Metcalf Bankopenserve_contentreadrN   r8   )r`   
httpserverrh   rZ   r   df1df2s          r   test_banklist_urlzTestReadHtml.test_banklist_url   s     -'222 	a$$QVVXX$666""5  C #"$  C	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	sC(((((   AA++A/2A/c                    t          |d          5 }|                    |                                            ||j        d          } ||j        d          }d d d            n# 1 swxY w Y   t	          ||           d S )Nr   r   r   	.*Water.*r=   Unitr   )r`   r   re   rZ   r   r   r   s          r   test_spam_urlzTestReadHtml.test_spam_url   s     )g... 	A!$$QVVXX$666"":>EEEC"":>@@@C	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A 	A
 	sC(((((r   c                 f     ||dddi          } ||dddi          }t          ||           d S )Nz.*Florida.*idtabler>   rt   r   r8   )r`   rh   rZ   r   r   s        r   test_banklistzTestReadHtml.test_banklist   s`    tWo
 
 
 g
 
 
 	sC(((((r    c                      ||d          } ||d          }t          ||           |d         j        d         dk    sJ |d         j        d         dk    sJ d S )Nr   r=   r   r   r   r   
ProximatesNutrient)r8   ilocrq   r`   re   rZ   r   r   s        r   	test_spamzTestReadHtml.test_spam  sz    y<<<y777sC(((1v{4 L00001v~a J......r    c                 T     ||          }|D ]}t          |t                    sJ d S r#   r$   )r`   re   rZ   dfsr   s        r   test_spam_no_matchzTestReadHtml.test_spam_no_match  sD    y)) 	- 	-Bb),,,,,,	- 	-r    c                 \     ||ddi          }|D ]}t          |t                    sJ d S )Nr   r   )rt   r$   )r`   rh   rZ   r   r   s        r   test_banklist_no_matchz#TestReadHtml.test_banklist_no_match  sM    }T7ODDD 	- 	-Bb),,,,,,	- 	-r    c                 f     ||dd          d         }|j         d         dk    sJ |j        rJ d S )Nr   rj   r>   headerr   r   )rq   r/   )r`   re   rZ   r   s       r   test_spam_headerzTestReadHtml.test_spam_header  sI    i{1EEEaHz!},,,,8r    c                 ^     ||dd          } ||dd          }t          ||           d S Nr   r   r>   skiprowsr   r   r   s        r   test_skiprows_intzTestReadHtml.test_skiprows_int"  F    yaHHHyCCCsC(((((r    c                      ||dt          d                    } ||dt          d                    }t          ||           d S Nr   rj   r   r   )ranger8   r   s        r   test_skiprows_rangez TestReadHtml.test_skiprows_range(  R    yeAhhOOOyqJJJsC(((((r    c                 f     ||dddg          } ||dddg          }t          ||           d S Nr   r   rj   r   r   r   r   s        r   test_skiprows_listzTestReadHtml.test_skiprows_list.  N    yq!fMMMy!QHHHsC(((((r    c                 f     ||dddh          } ||dddh          }t          ||           d S r   r   r   s        r   test_skiprows_setzTestReadHtml.test_skiprows_set4  r   r    c                 ^     ||dd          } ||dd          }t          ||           d S r   r   r   s        r   test_skiprows_slicez TestReadHtml.test_skiprows_slice:  r   r    c                      ||dt          d                    } ||dt          d                    }t          ||           d S r   slicer8   r   s        r   test_skiprows_slice_shortz&TestReadHtml.test_skiprows_slice_short@  r   r    c           	           ||dt          dd                    } ||dt          ddd                    }t          ||           d S )	Nr   rj      r   r   rk   r   r   r   s        r   test_skiprows_slice_longz%TestReadHtml.test_skiprows_slice_longF  sX    yeAqkkRRRyq!RQQQsC(((((r    c                      ||dt          j        d                    } ||dt          j        d                    }t          ||           d S r   )ru   aranger8   r   s        r   test_skiprows_ndarrayz"TestReadHtml.test_skiprows_ndarrayL  sW    ybiPQllSSSy1NNNsC(((((r    c                     t          j        t          d          5   ||dd           d d d            d S # 1 swxY w Y   d S )Nz%is not a valid type for skipping rowsr=   r   asdfr   )rC   rF   	TypeError)r`   re   rZ   s      r   test_skiprows_invalidz"TestReadHtml.test_skiprows_invalidR  s    ]9-TVVV 	L 	LYkFKKKK	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	Ls   8<<c                 ^     ||dd          } ||dd          }t          ||           d S Nr   r   r>   rS   r   r   r   s        r   
test_indexzTestReadHtml.test_indexV  sF    yqIIIy!DDDsC(((((r    c                 b     ||ddd          } ||ddd          }t          ||           d S Nr   r   r   )r>   r   rS   r   r   r   s        r   test_header_and_index_no_typesz+TestReadHtml.test_header_and_index_no_types[  K    yAQRSSSyqANNNsC(((((r    c                 b     ||ddd          } ||ddd          }t          ||           d S r   r   r   s        r    test_header_and_index_with_typesz-TestReadHtml.test_header_and_index_with_types`  r   r    c                 ^     ||dd          } ||dd          }t          ||           d S r   r   r   s        r   test_infer_typeszTestReadHtml.test_infer_typese  sF    yqIIIy!DDDsC(((((r    c                    t          |d          5 }t          |                                          }d d d            n# 1 swxY w Y   t          |d          5 }t          |                                          }d d d            n# 1 swxY w Y    ||d          } ||d          }t          ||           d S NzUTF-8r   r   r=   r   )r   r   r   r8   )r`   re   rZ   r   data1data2r   r   s           r   test_string_iozTestReadHtml.test_string_iok  s:   )g... 	'!QVVXX&&E	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' )g... 	'!QVVXX&&E	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' uK888uF333sC(((((s#   "A  AA"B

BBc                    t          |d          5 }|                                }d d d            n# 1 swxY w Y    |t          |          d          } |t          |          d          }t          ||           d S r  )r   r   r   r8   )r`   re   rZ   r   r   r   r   s          r   test_stringzTestReadHtml.test_stringv  s    )g... 	!6688D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 x~~[AAAx~~V<<<sC(((((s   377c                     t          |d          5 } ||d          }d d d            n# 1 swxY w Y   t          |d          5 } ||d          }d d d            n# 1 swxY w Y   t          ||           d S r  )r   r8   )r`   re   rZ   r   r   r   s         r   test_file_likezTestReadHtml.test_file_like  s   )g... 	9!""1K888C	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 )g... 	4!""1F333C	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	sC(((((s   ,00A""A&)A&c                     |                     dd           t          j        t          d          5   |dd           d d d            d S # 1 swxY w Y   d S )Nz#urlopen error unknown url type: git  coder=   zgit://github.comr   )r   rC   rF   r   r`   r   rZ   s      r   test_bad_url_protocolz"TestReadHtml.test_bad_url_protocol  s     	  !FS QQQ]8+PQQQ 	D 	D/{CCCC	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	Ds   AAAc                     |                     dd           t          j        t          t          fd          5   ||j        d           d d d            d S # 1 swxY w Y   d S )NzName or service not knownr  r  zHTTP Error 404: NOT FOUNDr=   r   )r   rC   rF   r   rM   rN   r  s      r   test_invalid_urlzTestReadHtml.test_invalid_url  s     	  !<3 GGG]Hj19TUUU 	@ 	@Z^;????	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@s   AA!Ac                     |} |t          t          j                            |                    dddi          }t	          |t
                    sJ |D ]}t	          |t                    sJ d S )NFirstr   r   r   )r   ospathabspathr%   rz   r   r`   rh   rZ   rN   r   r   s         r   test_file_urlzTestReadHtml.test_file_url  s    RW__S1122'$PW
 
 
 #t$$$$$ 	- 	-Bb),,,,,,	- 	-r    c                     |}t          j        t          d          5   ||dddi           d d d            d S # 1 swxY w Y   d S )NzNo tables foundr=   r   r   	tasdfabler   rC   rF   rM   )r`   rh   rZ   rN   s       r   test_invalid_table_attrsz%TestReadHtml.test_invalid_table_attrs  s    ]:->??? 	 	:4BU   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   <A A c                 p     ||dddiddg          d         }t          |j        t                    sJ d S )NMetcalfr   r   r   r   )r>   rt   r   r%   rq   r   r`   rh   rZ   r   s       r   test_multiindex_headerz#TestReadHtml.test_multiindex_header  sR    4/1a&
 
 

 "*j1111111r    c                 p     ||dddiddg          d         }t          |j        t                    sJ d S )Nr"  r   r   r   r   )r>   rt   rS   )r%   r   r   r$  s       r   test_multiindex_indexz"TestReadHtml.test_multiindex_index  sS    4/aQRV
 
 

 "(J///////r    c                      ||dddiddgddg          d         }t          |j        t                    sJ t          |j        t                    sJ d S )Nr"  r   r   r   r   )r>   rt   r   rS   )r%   rq   r   r   r$  s       r   test_multiindex_header_indexz)TestReadHtml.test_multiindex_header_index  sv    /q6!f
 
 
  "*j11111"(J///////r    c                 r     ||dddiddgd          d         }t          |j        t                    sJ d S Nr"  r   r   r   r   )r>   rt   r   r   r#  r$  s       r   &test_multiindex_header_skiprows_tuplesz3TestReadHtml.test_multiindex_header_skiprows_tuples  Z    /q6
 
 
  "*j1111111r    c                 r     ||dddiddgd          d         }t          |j        t                    sJ d S r+  r#  r$  s       r   test_multiindex_header_skiprowsz,TestReadHtml.test_multiindex_header_skiprows  r-  r    c                      ||dddiddgddgd          d         }t          |j        t                    sJ t          |j        t                    sJ d S )Nr"  r   r   r   r   )r>   rt   r   rS   r   )r%   r   r   rq   r$  s       r   %test_multiindex_header_index_skiprowsz2TestReadHtml.test_multiindex_header_index_skiprows  sy    /q6!f
 
 
  "(J/////"*j1111111r    c                 ,   |} |t          t          j                            |                    t	          j        t	          j        d                    ddi          }t          |t                    sJ |D ]}t          |t                    sJ d S )NFloridar   r   r   )	r   r  r  r  recompiler%   rz   r   r  s         r   test_regex_idempotencyz#TestReadHtml.test_regex_idempotency  s    RW__S1122*RZ	2233/
 
 

 #t$$$$$ 	- 	-Bb),,,,,,	- 	-r    c                     d}t          j        t          |          5   ||dd           d d d            d S # 1 swxY w Y   d S )Nz\(you passed a negative value\)r=   Waterr   r   r  )r`   re   rZ   r4   s       r   test_negative_skiprowsz#TestReadHtml.test_negative_skiprows  s    0]:S111 	D 	DYgCCCC	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	Ds   :>>c                     dS )Naf  
          <table class="contentstable" align="center"><tr>
            <td width="50%">
            <p class="biglink"><a class="biglink" href="whatsnew/2.7.html">What's new in Python 2.7?</a><br/>
                <span class="linkdescr">or <a href="whatsnew/index.html">all "What's new" documents</a> since 2.0</span></p>
            <p class="biglink"><a class="biglink" href="tutorial/index.html">Tutorial</a><br/>
                <span class="linkdescr">start here</span></p>
            <p class="biglink"><a class="biglink" href="library/index.html">Library Reference</a><br/>
                <span class="linkdescr">keep this under your pillow</span></p>
            <p class="biglink"><a class="biglink" href="reference/index.html">Language Reference</a><br/>
                <span class="linkdescr">describes syntax and language elements</span></p>
            <p class="biglink"><a class="biglink" href="using/index.html">Python Setup and Usage</a><br/>
                <span class="linkdescr">how to use Python on different platforms</span></p>
            <p class="biglink"><a class="biglink" href="howto/index.html">Python HOWTOs</a><br/>
                <span class="linkdescr">in-depth documents on specific topics</span></p>
            </td><td width="50%">
            <p class="biglink"><a class="biglink" href="installing/index.html">Installing Python Modules</a><br/>
                <span class="linkdescr">installing from the Python Package Index &amp; other sources</span></p>
            <p class="biglink"><a class="biglink" href="distributing/index.html">Distributing Python Modules</a><br/>
                <span class="linkdescr">publishing modules for installation by others</span></p>
            <p class="biglink"><a class="biglink" href="extending/index.html">Extending and Embedding</a><br/>
                <span class="linkdescr">tutorial for C/C++ programmers</span></p>
            <p class="biglink"><a class="biglink" href="c-api/index.html">Python/C API</a><br/>
                <span class="linkdescr">reference for C/C++ programmers</span></p>
            <p class="biglink"><a class="biglink" href="faq/index.html">FAQs</a><br/>
                <span class="linkdescr">frequently asked questions (with answers!)</span></p>
            </td></tr>
        </table>

        <p><strong>Indices and tables:</strong></p>
        <table class="contentstable" align="center"><tr>
            <td width="50%">
            <p class="biglink"><a class="biglink" href="py-modindex.html">Python Global Module Index</a><br/>
                <span class="linkdescr">quick access to all modules</span></p>
            <p class="biglink"><a class="biglink" href="genindex.html">General Index</a><br/>
                <span class="linkdescr">all functions, classes, terms</span></p>
            <p class="biglink"><a class="biglink" href="glossary.html">Glossary</a><br/>
                <span class="linkdescr">the most important terms explained</span></p>
            </td><td width="50%">
            <p class="biglink"><a class="biglink" href="search.html">Search page</a><br/>
                <span class="linkdescr">search this documentation</span></p>
            <p class="biglink"><a class="biglink" href="contents.html">Complete Table of Contents</a><br/>
                <span class="linkdescr">lists all sections and subsections</span></p>
            </td></tr>
        </table>
        rc   r`   s    r   python_docszTestReadHtml.python_docs  s    - -r    c                     |                     |            ||j        d          }t          |          dk    sJ d S )Nr   Pythonr=   r   )r   rN   r)   )r`   r<  r   rZ   r   s        r   test_multiple_matchesz"TestReadHtml.test_multiple_matches(  sI     	   555z~X>>>3xx!||||||r    c                     |                     |            ||j        d          }d |D             }t          |          ddgk    sJ d S )Nr   r>  r=   c                 8    g | ]}|j         d          dd         S )r   r   rk   )r   )r   r   s     r   
<listcomp>z7TestReadHtml.test_python_docs_table.<locals>.<listcomp>4  s'    ///RbgdmAaC ///r    PythWhat)r   rN   sorted)r`   r<  r   rZ   r   zzs         r   test_python_docs_tablez#TestReadHtml.test_python_docs_table/  sg     	   555z~X>>>//3///bzzff-------r    c                 d    d} |t          |                    }t          |          dk    sJ dS )z@
        Make sure that read_html ignores empty tables.
        a  
            <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
            </table>
            <table>
                <tbody>
                </tbody>
            </table>
        r   N)r   r)   )r`   rZ   r?   r   s       r   test_empty_tableszTestReadHtml.test_empty_tables7  s>    ( "!(4..116{{ar    c                      |t          d                    d         }t          ddgddggddg	          }t          j        ||           d S )
Na  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </tbody>
            <tbody>
                <tr>
                    <td>3</td>
                    <td>4</td>
                </tr>
            </tbody>
        </table>r   r   rj   rl   rk   ABr   rq   r   r   r-   r.   r`   rZ   r   r   s       r   test_multiple_tbodyz TestReadHtml.test_multiple_tbodyR  sp     "! 
 
. /2 Aq6Aq6"2S#JGGG
fh/////r    c                      |t          d                    d         }t          ddidg          }t          j        ||           dS )zt
        Don't fail with bs4 when there is a header and only one column
        as described in issue #9178
        a3  <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>first</td>
                    </tr>
                </tbody>
            </table>r   Headerfirstr   r   NrN  rO  s       r   test_header_and_one_columnz'TestReadHtml.test_header_and_one_columnr  sf    
 "! 
 
 " 8W"5aSAAA
fh/////r    c                      |t          d                    d         }t          g dgg d          }t          j        ||           dS )zK
        Ensure parser adds <tr> within <thead> on malformed HTML.
        a  <table>
            <thead>
                <tr>
                    <th>Country</th>
                    <th>Municipality</th>
                    <th>Year</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Ukraine</td>
                    <th>Odessa</th>
                    <td>1944</td>
                </tr>
            </tbody>
        </table>r   )UkraineOdessa  )CountryMunicipalityYearrM  NrN  rO  s       r   test_thead_without_trz"TestReadHtml.test_thead_without_tr  sx     "! 
 
& '* ---.777
 
 

 	fh/////r    c                    d}t          ddggddg          }t          ddgddggddg          }|                    d	
          }|                    d
          } |t          |                    d         } |t          |                    d         }t          j        ||           t          j        ||           dS )zh
        Make sure that read_html reads tfoot, containing td or th.
        Ignores empty tfoot
        a  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>bodyA</td>
                    <td>bodyB</td>
                </tr>
            </tbody>
            <tfoot>
                {footer}
            </tfoot>
        </table>bodyAbodyBrK  rL  rM  footAfootB )footerz%<tr><td>footA</td><th>footB</th></tr>r   N)r   r|   r   r-   r.   )	r`   rZ   data_template	expected1	expected2r  r  result1result2s	            r   test_tfoot_readzTestReadHtml.test_tfoot_read  s    
$ Wg$6#7#sLLL	G$w&89C:
 
 
	 $$B$//$$,S$TT""8E??33A6""8E??33A6
gy111
gy11111r    c                      |t          d          d          d         }t          ddggd          }t          j        ||           d S )Na
  
            <table>
                <tr>
                    <td>S</td>
                    <td>I</td>
                </tr>
                <tr>
                    <td>text</td>
                    <td>1944</td>
                </tr>
            </table>
        r   r   textrY  )SIrp   rN  rO  s       r   &test_parse_header_of_non_string_columnz3TestReadHtml.test_parse_header_of_non_string_column  sp     "!  
 
 
  !$ vtn-zBBB
fh/////r    c                    ddl m fd} ||dddi          d         }t           |dd	d
d          t          t          d          }|j        |j        k    sJ g d}g d}|                    |                              ||          }	|                    |          }
|	}ddg}||                             t                    ||<   t          j
        ||
           d S )Nr   )_remove_whitespacec                 @    	  |           S # t           $ r | cY S w xY wr#   )AttributeError)r&   rr  s    r   try_remove_wsz8TestReadHtml.test_banklist_header.<locals>.try_remove_ws  s=    ))!,,,!   s   
 r"  r   r   r   r   r   csvzbanklist.csv)Updated DateClosing Date
converters)
z,First Vietnamese American Bank In Vietnamesez"Westernbank Puerto Rico En Espanolz*R-G Premier Bank of Puerto Rico En EspanolzEurobank En EspanolzSanderson State Bank En EspanolzLWashington Mutual Bank (Including its subsidiary Washington Mutual Bank FSB)zSilver State Bank En Espanolz%AmTrade International Bank En EspanolzHamilton Bank, NA En Espanolz6The Citizens Savings Bank Pioneer Community Bank, Inc.)
zFirst Vietnamese American BankzWesternbank Puerto RicozR-G Premier Bank of Puerto RicoEurobankzSanderson State BankzWashington Mutual BankzSilver State BankzAmTrade International BankzHamilton Bank, NAzThe Citizens Savings Bankrx  rw  )pandas.io.htmlrr  r   r   shaper+   replaceapplyr   r-   r.   )r`   rh   r   rZ   ru  r   ground_trutholdnewdfnewgtnew	converted	date_colsrr  s                @r   test_banklist_headerz!TestReadHtml.test_banklist_header  s8   555555	 	 	 	 	 m9T7OTTTUVWHT65.99(19MM
 
 
 x<-----
 
 

 
 
 }%%--c377  //	#^4	(399+FF	)
i/////r    c                     d}t          |d          5 }|                                }d d d            n# 1 swxY w Y   ||v sJ  ||dddi          d         }||                                v sJ d S )NzGold Canyonr   r   r   r   r   r   )r   r   	to_string)r`   rh   rZ   gcr   raw_textr   s          r   test_gold_canyonzTestReadHtml.test_gold_canyon  s    -'222 	 avvxxH	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  X~~~~tWo
 
 

 R\\^^######s   599c                      |t          d          d          d         } |t          d          d          d         }t          j        ||           d S )Na  <table>
                        <thead>
                            <tr style="text-align: right;">
                            <th></th>
                            <th>C_l0_g0</th>
                            <th>C_l0_g1</th>
                            <th>C_l0_g2</th>
                            <th>C_l0_g3</th>
                            <th>C_l0_g4</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <th>R_l0_g0</th>
                            <td> 0.763</td>
                            <td> 0.233</td>
                            <td> nan</td>
                            <td> nan</td>
                            <td> nan</td>
                            </tr>
                            <tr>
                            <th>R_l0_g1</th>
                            <td> 0.244</td>
                            <td> 0.285</td>
                            <td> 0.392</td>
                            <td> 0.137</td>
                            <td> 0.222</td>
                            </tr>
                        </tbody>
                    </table>r   rS   a  <table>
                    <thead>
                        <tr style="text-align: right;">
                        <th></th>
                        <th>C_l0_g0</th>
                        <th>C_l0_g1</th>
                        <th>C_l0_g2</th>
                        <th>C_l0_g3</th>
                        <th>C_l0_g4</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                        <th>R_l0_g0</th>
                        <td> 0.763</td>
                        <td> 0.233</td>
                        </tr>
                        <tr>
                        <th>R_l0_g1</th>
                        <td> 0.244</td>
                        <td> 0.285</td>
                        <td> 0.392</td>
                        <td> 0.137</td>
                        <td> 0.222</td>
                        </tr>
                    </tbody>
                 </table>)r   r-   r.   )r`   rZ   r   r   s       r   test_different_number_of_colsz*TestReadHtml.test_different_number_of_cols'  s    ##  @ C"
 "
 "
D E"H "! : =
 
 
> ?B 	fh/////r    c                      |t          d                    d         }t          g dgg d          }t          j        ||           d S )NaZ  
            <table>
                <tr>
                    <th>A</th>
                    <th colspan="1">B</th>
                    <th rowspan="1">C</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                    <td>c</td>
                </tr>
            </table>
        r   r   )rK  rL  Crp   rN  rO  s       r   test_colspan_rowspan_1z#TestReadHtml.test_colspan_rowspan_1o  sh    !! 
 
" #& ooo.HHH
fh/////r    c                      |t          d          d          d         }t          g dgg d          }t          j        ||           d S )Na  
            <table>
                <tr>
                    <td colspan="2">X</td>
                    <td>Y</td>
                    <td rowspan="2">Z</td>
                    <td>W</td>
                </tr>
                <tr>
                    <td>A</td>
                    <td colspan="2">B</td>
                    <td>C</td>
                </tr>
            </table>
        r   rl  )rK  rL  rL  Zr  )XzX.1Yr  WrM  rN  rO  s       r    test_colspan_rowspan_copy_valuesz-TestReadHtml.test_colspan_rowspan_copy_values  s     "! " %
 
 
& '* +++,6Q6Q6Q
 
 
 	fh/////r    c                      |t          d          d          d         }t          g dgg d          }t          j        ||           d S )Na(  
            <table>
                <tr>
                    <td rowspan="2">A</td>
                    <td rowspan="2" colspan="3">B</td>
                    <td>C</td>
                </tr>
                <tr>
                    <td>D</td>
                </tr>
            </table>
        r   rl  )rK  rL  rL  rL  D)rK  rL  zB.1zB.2r  rM  rN  rO  s       r   test_colspan_rowspan_both_not_1z,TestReadHtml.test_colspan_rowspan_both_not_1  s     "!  
 
 
  !$ +++,6S6S6S
 
 
 	fh/////r    c                      |t          d          d          d         }t          ddggddg          }t          j        ||           d S )Nz
            <table>
                <tr>
                    <td>A</td>
                    <td rowspan="2">B</td>
                </tr>
                <tr>
                    <td>C</td>
                </tr>
            </table>
        r   rl  r  rL  rK  rM  rN  rO  s       r   test_rowspan_at_end_of_rowz'TestReadHtml.test_rowspan_at_end_of_row  ss     "!
  
 
 
 " C:,c
CCC
fh/////r    c                      |t          d          d          d         }t          ddgddggddg          }t          j        ||           d S )Nz
            <table>
                <tr>
                    <td rowspan="3">A</td>
                    <td rowspan="3">B</td>
                </tr>
            </table>
        r   rl  rK  rL  rM  rN  rO  s       r   test_rowspan_only_rowsz#TestReadHtml.test_rowspan_only_rows  sz     "!	 	 
 
 
  C:Sz":S#JOOO
fh/////r    c                      |t          d                    d         }t          ddgddggddgddgg          }t          dd	gg|
          }t          j        ||           d S )Nam  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <th>a</th>
                    <th>b</th>
                </tr>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </table>
        r   rK  rL  r   r   r   levelscodesrj   rM  r   r   r   r-   r.   r`   rZ   r   rq   r   s        r   +test_header_inferred_from_rows_with_only_thz8TestReadHtml.test_header_inferred_from_rows_with_only_th  s    !! 
 
& '* c3Z#s$<aVaQRVDTUUUAq6(G<<<
fh/////r    c                 J   t          dt          dd          i          }|                                } |t          |          dgd          }t	          j        ||d                     |t          |          dgd          }t	          j        ||d                    d S )Ndate1/1/2001
   periodsr   r   parse_datesrS   )r   r   r   r   r-   r.   )r`   rZ   r   r   r   s        r   test_parse_dates_listz"TestReadHtml.test_parse_dates_list  s    
:r B B BCDD::<<x11saPPP
b#a&)))x11xSTUUU
b#a&)))))r    c                 p   t          t          dd                    }t          |                    d           |                    d           d          } |t	          |                                          ddd	gid
          }t          d|i          }t          j        ||d                    d S )Nr  r  r  c                 D    t          |                                           S r#   )strr  r&   s    r   r(   z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>%      AFFHH r    c                 D    t          |                                           S r#   )r  timer  s    r   r(   z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>&  r  r    )r  r  datetimer   rj   r  r   )r   r   r   r+   r   r   r-   r.   )r`   rZ   	raw_datesr   r   newdfs         r   test_parse_dates_combinez%TestReadHtml.test_parse_dates_combine!  s    :j"===>>	!&=&=>>!&=&=>> 
 
 RZZ\\""aV0DPQ
 
 
 :y122
eSV,,,,,r    c                     |dddd          }t           j                            |          sJ t          |           d            t           j                            |          sJ t          |           d             ||dd	          d
         }|j        dk    sJ d|j        d         v sJ |d         j        t          j        d          k    sJ t          j	        |j
        d         d          sJ d S )Nr   r   r?   wikipedia_states.htmlz is not a filez is an empty fileArizonar   r   r   )<      Unnamedr   sq mifloat64)r   r  HzPN$A)r  r  isfilereprgetsizer}  rq   ro   ru   allcloselocr`   r   rZ   r   r   s        r   test_wikipedia_states_tablez(TestReadHtml.test_wikipedia_states_table/  s   xff.EFFw~~d##BBT

%B%B%BBBBwt$$FFd&F&F&FFFF!!$iBBB1E|x''''FN2.....g$(;(;;;;;{6:j19=======r    c                      |dddd          } ||dd          d         }|j         dk    sJ d	|j        d
         d         v sJ |j        j        dk    sJ t          j        |j        d         d          sJ d S )Nr   r   r?   r  r  r   r   )r     r  r   r   rj   )Alaska)zTotal area[2]r  r  )r}  rq   nlevelsru   r  r  r  s        r    test_wikipedia_states_multiindexz-TestReadHtml.test_wikipedia_states_multiindex9  s    xff.EFF!!$i1EEEaH|x''''FN2.q11111~%****{6:&JKYWWWWWWWr    c                      |t          d          ddg          }t          ddggt          j        ddg          	          }t	          j        |d         |           d S )
NaK  
                <table>
                    <thead>
                        <tr><th></th><th></tr>
                        <tr><th>A</th><th>B</th></tr>
                    </thead>
                    <tbody>
                        <tr><td>a</td><td>b</td></tr>
                    </tbody>
                </table>
            r   r   rl  r   r   )Unnamed: 0_level_0rK  )zUnnamed: 1_level_0rL  rp   )r   r   r   from_tuplesr-   r.   rO  s       r   %test_parser_error_on_empty_header_rowz2TestReadHtml.test_parser_error_on_empty_header_rowA  s    !!
  q6
 
 
  3ZL*,.IJ 
 
 
 	fQi22222r    c                      |t          d          d          d         }t          ddidg          }|d         j        t          j        d          k    sJ t	          j        ||           d S )	Na  <html>
            <body>
             <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1100#101</td>
                    </tr>
                </tbody>
            </table>
            </body>
        </html>#)decimalr   rR  gClg0@rT  r  )r   r   ro   ru   r-   r.   rO  s       r   test_decimal_rowszTestReadHtml.test_decimal_rowsZ  s    !! $ '
 
 
( ), 8X"6qcBBBh%))<)<<<<<
fh/////r    argTFc                     t          j        d          }t          j        t          |          5   |||           d d d            d S # 1 swxY w Y   d S )NzPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column namesr=   rl  )r4  escaperC   rF   r   )r`   re   r  rZ   r4   s        r   test_bool_header_argz!TestReadHtml.test_bool_header_argw  s     i
 

 ]9C000 	4 	4Ys3333	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4s   AAAc                      |t          d          dt          i          d         }t          dddgi          }t          j        ||           d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                    </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>r   ry  r   z0.763z0.244)r   r  r   r-   r.   rO  s       r   test_converterszTestReadHtml.test_converters  sp    !! " Sz%
 
 
& '* cGW#5677
fh/////r    c                      |t          d          dg          d         }t          ddt          j        gi          }t	          j        ||           d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                   </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>gZd;?)	na_valuesr   r   g"~j?r   r   ru   r   r-   r.   rO  s       r   test_na_valueszTestReadHtml.test_na_values  so    !! " g%
 
 
& '* cE26?344
fh/////r    c                 R   d}t          dddgi          } |t          |          d          d         }t          j        ||           t          dt          j        t          j        gi          } |t          |          d          d         }t          j        ||           d S )	Na  <table>
                        <thead>
                            <tr>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> N/A</td>
                            </tr>
                            <tr>
                            <td> NA</td>
                            </tr>
                        </tbody>
                    </table>r   zN/Ar
   F)keep_default_nar   T)r   r   r-   r.   ru   r   )r`   rZ   	html_dataexpected_dfhtml_dfs        r   test_keep_default_naz!TestReadHtml.test_keep_default_na  s     	   udm 455""8I#6#6NNNqQ
k7333rvrv&6 788""8I#6#6MMMaP
k733333r    c                      |t          d                    d         }t          ddgt          j        t          j        ggddg          }t	          j        ||           d S )Nak  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                </tr>
                <tr>
                    <td></td>
                    <td></td>
                </tr>
            </table>
        r   r   r   rK  rL  rM  r  rO  s       r   test_preserve_empty_rowsz%TestReadHtml.test_preserve_empty_rows  st    !! 
 
& '* C:/?"@3PS*UUU
fh/////r    c                      |t          d                    d         }t          ddgddggddgddgg          }t          dd	gg|
          }t          j        ||           d S )NaU  
            <table>
                <thead>
                    <tr><th></th><th></tr>
                    <tr><th>A</th><th>B</th></tr>
                    <tr><th>a</th><th>b</th></tr>
                </thead>
                <tbody>
                    <tr><td>1</td><td>2</td></tr>
                </tbody>
            </table>
        r   rK  rL  r   r   r   r  rj   rM  r  r  s        r   ,test_ignore_empty_rows_when_inferring_headerz9TestReadHtml.test_ignore_empty_rows_when_inferring_header  s    !! 
 
 " c3Z#s$<aVaQRVDTUUUAq6(G<<<
fh/////r    c                     t          g d          }g dg dg|_        |                    d          } |t          |                    d         }t	          j        ||           d S )N))HillaryD   r  )BernieJ   r  )DonaldE   R)r   )r  AgeParty)NamezUnnamed: 1_level_1zUnnamed: 2_level_1Fr   r   )r   rq   r   r   r-   r.   )r`   rZ   r  r?   r  s        r   test_multiple_header_rowsz&TestReadHtml.test_multiple_header_rows  s    QQQ
 
 
 322@@@
 """//""8D>>2215
k733333r    c                      |dddd          } ||d          }t          |t                    sJ t          |d         t                    sJ d S )Nr   r   r?   rR   r   r  )r%   rz   r   )r`   r   rZ   rT   r   s        r   test_works_on_valid_markupz'TestReadHtml.test_works_on_valid_markup  sc    8D&&2EFFx1555#t$$$$$#a&),,,,,,,r    c                 B     |dddd          } ||dddg           d S )	Nr   r   r?   rg   r   rQ   r;   rL   rc   )r`   r   rZ   rh   s       r   test_fallback_successz"TestReadHtml.test_fallback_success  s:     vvGGk6:BVWWWWWWr    c                     t          dd          }t          t          j                            d                              d          |          }|                                }d|v sJ d S )Nz
2000-01-01r  r  rj   )r  rk   r   )r   r   ru   rv   rw   standard_normalr   )r`   rngr   r   s       r   test_to_html_timestampz#TestReadHtml.test_to_html_timestamp  si    r222ry,,Q//??HHPSTTTv%%%%%%r    c                    t          dddg          }|                                }|                    d          }|                    d          }|                    d          }|                    d          }|                    d          }d|v sJ ||k    sJ ||k    sJ ||k    sJ d	|v sJ d
|vsJ d|vsJ ||k    sJ d S )Nr   rj   rK  rL  T)borderr   Fz border="1"z border="2"z border="0"z border)r   r   )r`   r   out_border_defaultout_border_trueout_border_explicit_defaultout_border_nondefaultout_border_zeroout_border_falses           r   test_to_html_borderlessz$TestReadHtml.test_to_html_borderless"  s   aa(()**ZZ\\**D*11&(jjj&:&:# "

!
 4 4**A*..::U:33 22222"44444!%@@@@@!%::::: 55555O3333 00000"2222222r    zdisplayed_only,exp0,exp1fooNzfoo  bar  baz  quxc                     d} |t          |          |          }t          j        |d         |           |t          j        |d         |           d S t          |          dk    sJ d S )Na  <html>
          <body>
            <table>
              <tr>
                <td>
                  foo
                  <span style="display:none;text-align:center">bar</span>
                  <span style="display:none">baz</span>
                  <span style="display: none">qux</span>
                </td>
              </tr>
            </table>
            <table style="display: none">
              <tr>
                <td>foo</td>
              </tr>
            </table>
          </body>
        </html>displayed_onlyr   r   )r   r-   r.   r)   )r`   r  exp0exp1rZ   r   r   s          r   test_displayed_onlyz TestReadHtml.test_displayed_only5  sw    ( x~~nMMM
c!fd+++!#a&$/////s88q======r    r  c                     d} |t          |          |          d         }t          ddgddgd          }t          j        ||           d S )	NaW  
        <table>
            <tr>
                <th>A</th>
                <th>B</th>
            </tr>
            <tr>
                <td>1</td>
                <td>2</td>
            </tr>
            <tr>
                <td><span style="display:none"></span>4</td>
                <td>5</td>
            </tr>
        </table>
        r  r   r   rk   rj   r   r  rN  )r`   r  rZ   
html_tabler   r   s         r   &test_displayed_only_with_many_elementsz3TestReadHtml.test_displayed_only_with_many_elementsZ  sh    
  "!(:"6"6~VVV
 Aq6A7788
fh/////r    z\ignore:You provided Unicode markup but also provided a value for from_encoding.*:UserWarningc                 F   t           j                            |          }t           j                            |          d         }|                    d          \  }}	 t          |d          5 } ||                                |d                                          }d d d            n# 1 swxY w Y   t          |d          5 } |t          |                                          |d                                          }	d d d            n# 1 swxY w Y    |||d                                          }
t          j
        ||	           t          j
        ||
           d S # t          $ r+ t                      rd|v sd|v rt          j                      w xY w)Nr   _rb)r   rS   1632)r  r  basenamesplitextsplitr   r   popr   r-   r.   	Exceptionr	   rC   skip)r`   r   rZ   	base_pathrootr  r   fobjfrom_stringfrom_file_likefrom_filenames              r   test_encodezTestReadHtml.test_encoder  s&   
 G$$%788	w	**1-jjoo8	($// 4..IIKK(a  #%%               
 ($// 4!1!1DIIKK((8q" " "#%%               
 -,"X  cee  !+~>>>!+}===== 	 	 	"$$ "8##tx'7'7KMMM	sV   E+ .3B-!E+ -B11E+ 4B15E+ A DE+ DE+ DAE+ +5F c                 >   |j                             d          dk    rt          j        d            G d dt                    } |d          } ||          sJ t          j        t          d          5   ||           d d d            d S # 1 swxY w Y   d S )	NrB   rQ   zNot applicable for lxmlc                       e Zd Zd ZdS )FTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIOc                     dS NFrc   r;  s    r   seekablezOTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIO.seekable  s    ur    N)__name__
__module____qualname__r+  rc   r    r   UnseekableStringIOr(    s#            r    r/  z?
            <table><tr><td>spam<foobr />eggs</td></tr></table>z#passed a non-rewindable file objectr=   )keywordsgetrC   r  r   rF   rM   )r`   rZ   r/  bads       r   test_parse_failure_unseekablez*TestReadHtml.test_parse_failure_unseekable  s    $((22f<<K1222	 	 	 	 	 	 	 	 ! B
 

  $$$$$]:-RSSS 	" 	"S!!!	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"s   9BBBc                      G d d          } |d          } |d          } ||          sJ  ||          sJ d S )Nc                   <    e Zd Zd	dZd
dZd Zd Zd ZdefdZ	dS )9TestReadHtml.test_parse_failure_rewinds.<locals>.MockFilereturnNc                 "    || _         d| _        d S r*  )r   at_end)r`   r   s     r   __init__zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__init__  s     	#r    c                 4    | j         rdn| j        }d| _         |S )Nrc  T)r9  r   )r`   sizer   s      r   r   z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.read  s     ![7rrdi"r    c                     d| _         d S r*  )r9  )r`   offsets     r   seekz>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seek  s    #r    c                     dS )NTrc   r;  s    r   r+  zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekable  s    tr    c                     d S r#   rc   r;  s    r   __next__zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__next__  s    r    c                     | S r#   rc   r;  s    r   __iter__zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__iter__  s	     r    )r7  Nr#   )
r,  r-  r.  r:  r   r?  r+  rB  r   rD  rc   r    r   MockFiler6    s        $ $ $ $   
$ $ $    (      r    rE  z/<table><tr><td>spam<br />eggs</td></tr></table>z2<table><tr><td>spam<foobr />eggs</td></tr></table>rc   )r`   rZ   rE  goodr2  s        r   test_parse_failure_rewindsz'TestReadHtml.test_parse_failure_rewinds  s    	 	 	 	 	 	 	 	2 xIJJhKLL%%%%%$$$$$$$r    c                     G d dt           j                  } |dddd          } |||f          } |||f          }|                                 |                                 |                                s|                                r)	 |                                |                                )d |j        cxu r	|j        u sn J d S )Nc                        e Zd Z fdZ xZS )@TestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThreadc                     	 t                                                       d | _        d S # t          $ r}|| _        Y d }~d S d }~ww xY wr#   )superrunerrr  )r`   rN  	__class__s     r   rM  zDTestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThread.run  sZ    $GGKKMMM  $DHHH ! # # #"DHHHHHHH#s    , 
AAA)r,  r-  r.  rM  __classcell__)rO  s   @r   ErrorThreadrJ    s8        $ $ $ $ $ $ $ $ $r    rQ  r   r   r?   rR   )targetr2   )	threadingThreadstartis_aliverN  )r`   r   rZ   rQ  rT   helper_thread1helper_thread2s          r   test_importcheck_thread_safetyz+TestReadHtml.test_importcheck_thread_safety  s   
	$ 	$ 	$ 	$ 	$)* 	$ 	$ 	$ 8D&&2EFF$,<H;OOO$,<H;OOO%%'' 	>+B+B+D+D 	 %%'' 	>+B+B+D+D 	~)????^-?????????r    c                      |dddd          }t          |          } ||          d         } ||          d         }t          j        ||           d S )Nr   r   r?   r@   r   )r   r-   r.   )r`   r   rZ   file_path_string	file_pathr   r   s          r   test_parse_path_objectz#TestReadHtml.test_parse_path_object  sj    #8D&&+FF)**	/003y))!,
c3'''''r    c                      |t          d                    d         }t          dggdg          }t          j        ||           d S )Nz
            <table>
                <tr>
                    <th>A</th>
                </tr>
                <tr>
                    <td>word1<br>word2</td>
                </tr>
            </table>
        r   zword1 word2rK  rM  rN  rO  s       r   test_parse_br_as_spacez#TestReadHtml.test_parse_br_as_space  sd    !!	 
 
  M?"3cUCCC
fh/////r    )r*   bodyr   rd  c                    d}g dg dg dg dg dg dd}|d	         }|d
         }|d         }|dk    r|d         }|d         }|d         }n,|dk    r	|d         }n|dk    r	|d         }n|dk    r|d         } |t          |          |          d         }t          ||g|          }	|	                    t          j                  }	t          j        ||	           d S )Na  
          <table>
            <tr>
              <th>HTTP</th>
              <th>FTP</th>
              <th><a href="https://en.wiktionary.org/wiki/linkless">Linkless</a></th>
            </tr>
            <tr>
              <td><a href="https://en.wikipedia.org/">Wikipedia</a></td>
              <td>SURROUNDING <a href="ftp://ftp.us.debian.org/">Debian</a> TEXT</td>
              <td>Linkless</td>
            </tr>
            <tfoot>
              <tr>
                <td><a href="https://en.wikipedia.org/wiki/Page_footer">Footer</a></td>
                <td>
                  Multiple <a href="1">links:</a> <a href="2">Only first captured.</a>
                </td>
              </tr>
            </tfoot>
          </table>
          )HTTPFTPLinkless))rb  N)rc  N)rd  z'https://en.wiktionary.org/wiki/linkless)	WikipediaSURROUNDING Debian TEXTrd  ))re  zhttps://en.wikipedia.org/)rf  zftp://ftp.us.debian.org/)rd  N)Footer$Multiple links: Only first captured.N))rg  z)https://en.wikipedia.org/wiki/Page_footer)rh  1N)head_ignorehead_extractbody_ignorebody_extractfooter_ignorefooter_extractrl  rn  rj  r*   rm  ro  rk  r`  rd  r   extract_linksr   rp   )r   r   fillnaru   r   r-   r.   )
r`   r  rZ   gh_13141_datagh_13141_expecteddata_expfoot_exphead_expr   r   s
             r   test_extract_linkszTestReadHtml.test_extract_links  sV   0 766  
 POO  
  
  %
 
2 %]3$_5$]3%<<(8H()9:H(8HHF]](8HHH__()9:HHH__(8H!!(="9"9MMMaPh18DDD??26**
fh/////r    c                     d}t          j        t          |          5  t          |d           d d d            d S # 1 swxY w Y   d S )NzY`extract_links` must be one of {None, "header", "footer", "body", "all"}, got "incorrect"r=   	incorrectrp  rC   rF   rM   r   )r`   re   r4   s      r   test_extract_links_badz#TestReadHtml.test_extract_links_badC  s    I 	 ]:S111 	< 	<i{;;;;	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	<   =AAc                     d} |t          |          d          d         }t          dgg          }t          j        ||           d S )Nz
        <table>
          <tr>
            <td>
              <a href='https://google.com'>Google.com</a>
            </td>
          </tr>
        </table>
        r*   rp  r   )z
Google.comzhttps://google.comrN  r`   rZ   r   r   r   s        r    test_extract_links_all_no_headerz-TestReadHtml.test_extract_links_all_no_headerK  sX     "!(4..FFFqICDEFF
fh/////r    c                     d}t          j        t          |          5  t          dd           d d d            d S # 1 swxY w Y   d S )NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.r=   testnumpyr   r{  )r`   r4   s     r   test_invalid_dtype_backendz'TestReadHtml.test_invalid_dtype_backendZ  s    % 	 ]:S111 	5 	5fG4444	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5r}  c                     d} |t          |                    d         }t          ddgddggddg	          }t          j        ||           d S )
Na  
        <table>
            <tr>
                <th>
                    <style>.style</style>
                    A
                    </th>
                <th>B</th>
            </tr>
            <tr>
                <td>A1</td>
                <td>B1</td>
            </tr>
            <tr>
                <td>A2</td>
                <td>B2</td>
            </tr>
        </table>
        r   A1B1A2B2rK  rL  rM  rN  r  s        r   test_style_tagzTestReadHtml.test_style_tagb  sb    & "!(4..11!4D$<$">c
SSS
fh/////r    )dr,  r-  r.  ra   rC   fixturere   rh   r   r   marknetwork
single_cpur   r   slowr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r	  r  r  r  r  r  r   r%  r'  r)  r,  r/  r1  r6  r9  r<  r?  rG  rI  rP  rU  r]  rj  rp  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  parametrizer  r  r  r  r  r  r  r  r  r  r	  r   r  r  filterwarningsr%  r3  rG  rY  r]  r_  rx  r|  r  r  r  rc   r    r   r\   r\   p   sl         @ ^; ; ^; ^? ? ^?' ' ':0 :0 :0x [[) )  )  [[) )  ) [) ) )/ / /- - -
- - -
  
) ) )) ) )) ) )) ) )) ) )) ) )) ) )) ) )L L L) ) )
) ) )
) ) )
) ) )	) 	) 	)) ) )) ) ) [[D D  D
 [[[@ @   @
 [- - - [   [2 2 2 [0 0 0 [	0 	0 	0 [2 2 2 [2 2 2 [
2 
2 
2 [	- 	- 	-D D D
 ^. . ^.` [[   
 [[. .  .     60 0 0@0 0 040 0 0@$2 $2 $2L0 0 02 [-0 -0 -0^ [	$ 	$ 	$F0 F0 F0P0 0 02!0 !0 !0F0 0 0@0 0 0:0 0 0*0 0 08* * *- - -> > >X X X3 3 320 0 0: [UT5M224 4 3240 0 060 0 064 4 420 0 040 0 0.4 4 4- - - [X X X
& & &3 3 3& ["99eW%%t,II3455yy%7I7IJ	
 ! ! !< [-e}==0 0 >=0. [	&  	 8" " "( %  %  %D [[@ @  @.( ( (0 0 0* [U$G$G$GHHB0 B0 IHB0H< < <0 0 05 5 50 0 0 0 0r    r\   )4collections.abcr   	functoolsr   r   r   r   r  pathlibr   r4  rS  urllib.errorr   r  ru   rC   pandas.compatr	   pandas.util._test_decoratorsutil_test_decoratorstdpandasrx   r
   r   r   r   r   r   r   r   r   pandas._testing_testingr-   pandas.core.arraysr   r   pandas.io.commonr   r  r   r8   rI   rO   rW   r   
skip_if_norZ   r\   rc   r    r   <module>r     s   $ $ $ $ $ $              
			       				     ! ! ! ! ! !      - - - - - - ) ) ) ) ) ) ) ) )    
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
             
 . - - - - -     B B B
: : :(M M M@ @ @. . . U=2=#7#7z9R9R"STTTV=2=#8#8999  4 4 4I0 I0 I0 I0 I0 I0 I0 I0 I0 I0r    