您好,登錄后才能下訂單哦!
/*獲取URL范例*/ /* Find?查找獲取當前匹配的每個元素的后代 Eq?選擇第幾個 Attr?獲取對應的標簽屬性 AttrOr?獲取對應的標簽屬性。這個可以設置第二個參數。獲取的默認值?如果獲取不到默認調用對應默認值 Each?遍歷每一個元素 Text?獲取當前對應的文本 Html?獲取當前對象的標簽 AddClass?添加?class?不過用來抓取有點雞肋不知道為何要寫這個 Children?返回所有子元素 Filter?過濾標簽元素 Prev?獲取上一個元素 Next?獲取下一個元素 */ package?main import?( ????"fmt" ????"log" ????"os" ????"regexp" ????"strconv" ????"github.com/PuerkitoBio/goquery" ) func?getdata(ins?int,?ch?chan?int)?{ ????url?:=?"" ????if?ins?==?1?{ ????????url?=?"https://colobu.com/categories/Go" ????}?else?{ ????????url?=?"https://colobu.com/categories/Go/page/"?+?strconv.Itoa(ins)?+?"/" ????} ????doc,?err?:=?goquery.NewDocument(url) ????if?err?!=?nil?{ ????????log.Fatal(err) ????} ????//??<a?class="article-title"?href="/2019/06/01/packet-capture-injection-and-analysis-gopacket/">[譯]利用?gopackage?進行包的捕獲、注入和分析</a> ????doc.Find(".article-title").Each(func(i?int,?s?*goquery.Selection)?{ ????????a,?_?:=?s.Attr("href") ????????text?:=?s.Text() ????????a?=?"https://colobu.com"?+?a ????????//htmls,?_?:=?s.Html() ????????fmt.Println("") ????????fmt.Println("") ????????fmt.Println("??地址:"?+?a) ????????fmt.Println("??標題:"?+?text) ????????/*text?=?strings.ReplaceAll(text,?":",?"?") ????????text?=?strings.ReplaceAll(text,?"/",?"?") ????????text?=?strings.ReplaceAll(text,?"\\",?"?") ????????text?=?strings.ReplaceAll(text,?"?",?"?") ????????text?=?strings.ReplaceAll(text,?"*",?"?")?*/ ????????reg?:=?regexp.MustCompile(`:|\?|/|\*|<|>|"`) ????????tilte?:=?reg.ReplaceAllString(text,?"?") ????????docm,?err?:=?goquery.NewDocument(a) ????????if?err?!=?nil?{ ????????????log.Fatal(err) ????????} ????????sstext?:=?"" ????????docm.Find(".article-entry").Each(func(ii?int,?ss?*goquery.Selection)?{ ????????????sstext?=?ss.Text() ????????}) ????????//fmt.Println("??正文:"?+?sstext) ????????file,?_?:=?os.OpenFile("./爬蟲/第"+strconv.Itoa(ins)+"頁??"+strconv.Itoa(i+1)+"篇??"+tilte+"頁爬蟲.txt",?os.O_RDWR|os.O_TRUNC|os.O_CREATE,?0666) ????????defer?file.Close() ????????file.Write([]byte(text?+?"\n正文:\n"?+?sstext?+?"\n\n\n")) ????????fmt.Println("??----------------------------------------------------------------------------?") ????}) ????ch?<-?ins } func?Doing(s,?e?int)?{ ????ch?:=?make(chan?int) ????for?i?:=?s;?i?<=?e;?i++?{ ????????go?getdata(i,?ch) ????} ????for?i?:=?s;?i?<=?e;?i++?{ ????????n?:=?<-ch ????????fmt.Printf("第%d頁爬取完畢\n",?n) ????} } func?main()?{ ????var?start,?end?int ????fmt.Println("輸入起始頁") ????fmt.Scan(&start) ????fmt.Println("輸入終止頁") ????fmt.Scan(&end) ????Doing(start,?end) }
package?main import?( ????"fmt" ????"io" ????"net/http" ????"os" ????"strconv" ) func?HttpGet(url?string)?(res?string,?err?error)?{ ????fmt.Println(url) ????resp,?err1?:=?http.Get(url) ????if?err1?!=?nil?{ ????????err?=?err1 ????????//fmt.Println(err) ????????return ????} ????//fmt.Println(resp.Body) ????defer?resp.Body.Close() ????buf?:=?make([]byte,?4096) ????for?{ ????????n,?err2?:=?resp.Body.Read(buf) ????????if?n?==?0?{ ????????????fmt.Println("讀取完畢") ????????????break ????????} ????????if?err2?!=?nil?&&?err2?!=?io.EOF?{ ????????????//fmt.Println(err2) ????????????err?=?err2 ????????????return ????????} ????????res?+=?string(buf[:n]) ????} ????return } func?working(start,?end?int)?{ ????fmt.Printf("正在爬取%d頁面到%d頁",?start,?end) ????for?i?:=?start;?i?<=?end;?i++?{ ????????url?:=?"http://tieba.baidu.com/f?kw=%E5%88%AB%E5%85%8B&ie=utf-8&pn="?+?strconv.Itoa((i-1)*50) ????????//resp,?err?:=?http.Get(url) ????????result,?err?:=?HttpGet(url) ????????if?err?!=?nil?{ ????????????fmt.Println(err) ????????????continue ????????} ????????fmt.Println(result) ????????file,?err?:=?os.Create("第"?+?strconv.Itoa(i)?+?"頁面.html") ????????if?err?!=?nil?{ ????????????fmt.Println(err) ????????} ????????file.WriteString(result) ????????file.Close() ????} } func?main()?{ ????var?start,?end?int ????fmt.Println("請輸入爬取的起始頁(》=1):") ????fmt.Scan(&start) ????fmt.Println("請輸入爬取的結束頁(》=start):") ????fmt.Scan(&end) ????working(start,?end) }
package?main import?( ????"fmt" ????"io" ????"net/http" ????"os" ????"strconv" ) func?HttpGet(url?string)?(res?string,?err?error)?{ ????fmt.Println(url) ????resp,?err1?:=?http.Get(url) ????if?err1?!=?nil?{ ????????err?=?err1 ????????//fmt.Println(err) ????????return ????} ????//fmt.Println(resp.Body) ????defer?resp.Body.Close() ????buf?:=?make([]byte,?4096) ????for?{ ????????n,?err2?:=?resp.Body.Read(buf) ????????if?n?==?0?{ ????????????//fmt.Println("讀取完畢") ????????????break ????????} ????????if?err2?!=?nil?&&?err2?!=?io.EOF?{ ????????????//fmt.Println(err2) ????????????err?=?err2 ????????????return ????????} ????????res?+=?string(buf[:n]) ????} ????return } func?getdata(i?int,?ch?chan?int)?{ ????url?:=?"http://tieba.baidu.com/f?kw=%E5%88%AB%E5%85%8B&ie=utf-8&pn="?+?strconv.Itoa((i-1)*50) ????//resp,?err?:=?http.Get(url) ????fmt.Println("第"?+?strconv.Itoa(i)?+?"頁面.html") ????result,?err?:=?HttpGet(url) ????if?err?!=?nil?{ ????????fmt.Println(err) ????????//continue ????} ????//fmt.Println(result) ????file,?err?:=?os.Create("第"?+?strconv.Itoa(i)?+?"頁面.html") ????if?err?!=?nil?{ ????????fmt.Println(err) ????} ????file.WriteString(result) ????file.Close() ????ch?<-?i } func?working(s,?e?int)?{ ????ch?:=?make(chan?int) ????for?i?:=?s;?i?<=?e;?i++?{ ????????go?getdata(i,?ch) ????} ????for?i?:=?s;?i?<=?e;?i++?{ ????????n?:=?<-ch ????????fmt.Printf("第%d頁爬取完畢\n",?n) ????} } func?main()?{ ????var?start,?end?int ????fmt.Println("請輸入爬取的起始頁(》=1):") ????fmt.Scan(&start) ????fmt.Println("請輸入爬取的結束頁(》=start):") ????fmt.Scan(&end) ????working(start,?end) }
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。