UNPKG

html-metadata

Version:

Scrapes metadata of several different standards

181 lines (151 loc) 8.34 kB
<html lang="en" dir="ltr"> <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article: http://ogp.me/ns/article#"> <meta charset="utf-8"> <title>Turtles are AWESOME!!1 | Awesome Turtles Website</title> <meta name="author" content="Turtle Lvr"> <meta name="robots" content="we welcome our robot overlords"/> <meta name="description" content="Exposition on the awesomeness of turtles"/> <meta name="keywords" content="turtles, are, awesome" /> <link rel="apple-touch-icon" href="turtleapple.png" sizes="72x72"> <link rel="icon" href="turtle.png" sizes="18x18" type="image/png"> <link rel="apple-touch-icon" href="turtleapple2.png" sizes=""> <link rel="icon" href="turtle2.png" sizes="" type="image/png"> <link rel="canonical" href="http://example.com/turtles" /> <link rel="publisher" href="https://mediawiki.org"/> <link rel="author" href="http://examples.com/turtlelvr"/> <link rel="shortlink" href="http://example.com/c" /> <!--Open Graph--> <meta property="og:locale" content="en_US" /> <meta property="og:type" content="article" /> <meta property="og:title" content="Turtles are AWESOME!!1" /> <meta property="og:description" content="Exposition on the awesomeness of turtles" /> <meta property="og:url" content="http://example.com" /> <meta property="og:site_name" content="Awesome Turtles Website" /> <!--Image subproperty tags with no root --> <meta property="og:image:width" content="666" /> <!--Ignored--> <meta property="og:image:height" content="666" /> <!--Ignored--> <meta property="og:image" content="http://example.com/turtle.jpg" /> <meta property="og:image:secure_url" content="https://secure.example.com/turtle.jpg" /> <meta property="og:image:type" content="image/jpeg" /> <meta property="og:image:width" content="400" /> <meta property="og:image:width" content="666" /> <!--Ignored--> <meta property="og:image:height" content="300" /> <meta property="og:image" content="http://example.com/shell.jpg" /> <!--Interrupt image tags with audio tags--> <meta property="og:audio" content="http://example.com/sound.mp3" /> <meta property="og:audio:secure_url" content="https://secure.example.com/sound.mp3" /> <meta property="og:audio:type" content="audio/mpeg" /> <!--End interruption with audio tags --> <meta property="og:image:width" content="200" /> <meta property="og:image:height" content="150" /> <!--Invalid subproperty--> <meta property="og:cat:meow" content="purr" /><!--Ignored--> <!--Article vertical--> <meta property="article:tag" content="turtles" /> <meta property="article:tag" content="are" /> <meta property="article:tag" content="awesome" /> <meta property="article:section" content="Turtles are tough" /> <meta property="article:section" content="Turtles are flawless" /> <meta property="article:section" content="Turtles are cute" /> <meta property="article:published_time" content="2012-02-04T12:00:00+00:00" /> <meta property="article:modified_time" content="2015-01-14T19:14:27+00:00" /> <meta property="article:author" content="http://examples.com/turtlelvr" /> <meta property="article:publisher" content="http://mediawiki.org" /> <!--AL--> <meta property="al:ios:url" content="turtle://"> <meta property="al:ios:app_store_id" content="000"> <meta property="al:android:url" content="turtle://"> <meta property="al:android:package" content="superturtlearticle.androidapp"> <meta property="al:web:url" content="http://example.com/"> <meta property="al:web:should_fallback" content="true"> <!--Twitter--> <meta name="twitter:card" content="summary"> <meta name="twitter:site" content="@Turtlessssssssss"> <meta name="twitter:creator" content="@Turtlessssssssss"> <meta name="twitter:creator" content="@Turtlezzzzzzzzzz"> <meta name="twitter:url" content="http://www.example.com/turtles"> <meta name="twitter:title" content="Turtles are AWESOME!!1"> <meta name="twitter:description" content="Exposition on the awesomeness of turtles"> <meta name="twitter:image" content="http://example.com/turtles.jpg"> <meta name="twitter:image:alt" content="It's a bunch of turtles!"> <meta name="twitter:app:url:iphone" content="turtle://"> <meta name="twitter:app:id:iphone" content="000"> <meta name="twitter:app:url:googleplay" content="turtle://"> <meta name="twitter:app:id:googleplay" content="superturtlearticle.androidapp"> <!--BE Press--> <meta name="bepress_citation_series_title" content="Turtles" > <meta name="bepress_citation_author" content="Turtle Lvr" /> <meta name="bepress_citation_author_institution" content="Mediawiki" /> <meta name="bepress_citation_title" content="Turtles are AWESOME!!1" > <meta name="bepress_citation_date" content="2012" /> <meta name="bepress_citation_pdf_url" content="http://www.example.com/turtlelvr/pdf" /> <meta name="bepress_citation_abstract_html_url" content="http://www.example.com/turtlelvr" /> <meta name="bepress_citation_publisher" content="Turtles Society" /> <meta name="bepress_citation_online_date" content="2012/02/04" /> <!--Dublin Core--> <meta name="dc.title" content="Turtles are AWESOME!!1" > <meta name="dc.creator" content="http://www.example.com/turtlelvr" > <meta name="dc.description" content="Exposition on the awesomeness of turtles" > <meta name="dc.date" content="2012-02-04 12:00:00" > <meta name="dc.type" content="Text.Article" > <!--EPrints--> <meta name="eprints.title" content="Turtles are AWESOME!!1" > <meta name="eprints.creators_name" content="http://www.example.com/turtlelvr" > <meta name="eprints.abstract" content="Exposition on the awesomeness of turtles" > <meta name="eprints.datestamp" content="2012-02-04 12:00:00" > <meta name="eprints.type" content="article" > <!--Highwire Press--> <meta name="citation_journal_title" content="Turtles" > <meta name="citation_issn" content="1234-5678" > <meta name="citation_doi" content="10.1000/123" > <meta name="citation_publication_date" content="2012-02-04" > <meta name="citation_title" content="Turtles are AWESOME!!1" > <meta name="citation_author" content="Turtle Lvr" /> <meta name="citation_author_institution" content="Mediawiki" /> <meta name="citation_volume" content="150" /> <meta name="citation_issue" content="1" /> <meta name="citation_firstpage" content="123" /> <meta name="citation_lastpage" content="456" /> <meta name="citation_publisher" content="Turtles Society" /> <meta name="citation_abstract" content="Exposition on the awesomeness of turtles." /> <!--PRISM--> <meta name="prism.issn" content="1234-5678" > <meta name="prism.publicationName" content="Turtles Society" > <meta name="prism.publicationDate" content="2012-02-04" > <meta name="prism.startingPage" content="123" > <meta name="prism.copyright" content="2012 Turtles Society" > <meta name="prism.rightsAgent" content="permissions@turtles.com" > <meta name="prism.url" content="https://www.turtles.com" > <meta name="prism.doi" content="10.1000/123" > </head> <body> <!--COINS--> <span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_id=info%3Adoi%2Fhttp%3A%2F%2Fdx.doi.org%2F10.5555%2F12345678&amp;rfr_id=info%3Asid%2Fcrossref.org%3Asearch&amp;rft.atitle=Toward+a+Unified+Theory+of+High-Energy+Metaphysics%3A+Silly+String+Theory&amp;rft.jtitle=Journal+of+Psychoceramics&amp;rft.date=2008&amp;rft.volume=5&amp;rft.issue=11&amp;rft.spage=1&amp;rft.epage=3&amp;rft.aufirst=Josiah&amp;rft.aulast=Carberry&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.au=Josiah+Carberry"></span> <!--Schema.org Microdata--> <div itemscope> <h1 itemprop="headline">Turtles are AWESOME!!1</h1> <!--Uses text from inside tags--> <span itemprop="wordCount" content="10" /> <!--Self-closing tag--> <span itemprop="author" content="Turtle Lvr">Turtle H8r</span> <!--Prefers content attr to text inside tags--> <a href="http://www.archive.org/turtlearticle" itemprop="archivedAt">Turtle Article Archive</a> <!--Uses href and not text between tags--> </div> <!--Valid JSON-LD--> <script type="application/ld+json">{ "@context": "http://schema.org", "@type": "Organization", "url": "https://www.turtles.com" } </script> <!-- Invalid JSON-LD: ignored --> <script type="application/ld+json"> { "@id": "https://www.turtles.com/" "potentialAction" / { "@type": "ViewAction", "target": "android-app://com.turtles/" }, "@type": "WebPage", "@context": "http://schema.org" } </script> </body> </html>