Tmall网页采集数据格式化分析类
代码使用 simple_html_dom 类库 ,具体可以在github上搜索下,使用方法也很简单,功能非常强大,可以按照ID,标签,Class,属性等对数据进行解析。
//天猫数据格式化类 class TmallHtmlFormat{ public $obj = ""; public $productTitle = ""; public $productImgHref = ""; public $productImg = ""; public $productShop = ""; public $productPrice = ""; public function setObj($obj){ $this->obj = $obj; } public function getProductInfo(){ $return ['title'] = $this->getTitle(); $return ['shop'] = $this->getShop(); $return ['price'] = $this->getPrice(); $return ['img'] = $this->getProductImg(); return $return; } private function getTitle(){ //标题 $productTitle = $this->obj->find('p.productTitle'); if(is_array($productTitle) && $productTitle){ return $productTitle[0]->find('a',0)->title; } } private function getShop(){ //店铺名称 $productShop = $this->obj->find('div.productShop'); if(is_array($productShop) && $productShop){ $return = array($productShop[0]->find('a',0)->plaintext,$productShop[0]->find('a',0)->href); return $return; } } private function getPrice(){ $productPrice = $this->obj->find('p.productPrice'); if(is_array($productPrice) && $productPrice){ return $productPrice[0]->find('em',0)->title; } } private function getProductImg(){ $productImg = $this->obj->find('div.productImg-wrap'); if(is_array($productImg) && $productImg){ $return = array($productImg[0]->find('img',0)->src,$productImg[0]->find('a',0)->href); return $return; } } }