unfluff
Version:
A web page content extractor
12 lines • 1.11 kB
JSON
{
"url": "http://arabic.cnn.com/2013/middle_east/8/3/syria.clashes/index.html",
"expected": {
"meta_description": "",
"domain": "arabic.cnn.com",
"final_url": "http://arabic.cnn.com/2013/middle_east/8/3/syria.clashes/index.html",
"meta_keywords": "",
"cleaned_text": "\u062f\u0645\u0634\u0642\u060c \u0633\u0648\u0631\u064a\u0627 (CNN) -- \u0623\u0643\u062f\u062a \u062c\u0647\u0627\u062a \u0633\u0648\u0631\u064a\u0629 \u0645\u0639\u0627\u0631\u0636\u0629 \u0623\u0646 \u0641\u0635\u0627\u0626\u0644 \u0645\u0633\u0644\u062d\u0629 \u0645\u0639\u0627\u0631\u0636\u0629 \u0644\u0646\u0638\u0627\u0645 \u0627\u0644\u0631\u0626\u064a\u0633 \u0628\u0634\u0627\u0631 \u0627\u0644\u0623\u0633\u062f \u0648\u0639\u0644\u0649 \u0635\u0644\u0629 \u0628\u0640\"\u0627\u0644\u062c\u064a\u0634 \u0627\u0644\u062d\u0631\" \u062a\u0645\u0643\u0646\u062a \u0645\u0646 \u0627\u0644\u0633\u064a\u0637\u0631\u0629 \u0639\u0644\u0649 \u0645\u0633\u062a\u0648\u062f\u0639\u0627\u062a \u0644\u0644\u0623\u0633\u0644",
"meta_favicon": "",
"meta_lang": "ar"
}
}