jsoup 抓取 iteye 网站

jsoup 抓取 iteye 网站jsoup 效果 http knight black bob iteye com blog 比较 tika 和正则 我更喜欢 jsoup 正则 比较难写 下面有 jar 包下载 dependency dependency

大家好,欢迎来到IT知识分享网。

jsoup 效果

jsoup 抓取 iteye 网站

http://knight-black-bob.iteye.com/blog/

比较 tika 和正则 ,我更喜欢jsoup

正则 比较难写 ,, ,,,,

下面有jar 包下载

<dependency>

<groupId>org.jsoup</groupId>

<artifactId>jsoup</artifactId>

<version>1.9.2</version>

</dependency>

IteyeItemEntity [ userName=wosyingjun, userPicLink=http://www.iteye.com/upload/logo/user//fa9a8493-f9a7-3e3b-9630-12ad8f65d277-thumb.png?, userBlogLink=http://wosyingjun.iteye.com, title=推荐几个自己写的Java后端相关的范例项目, content=推荐几个自己写的范例项目 这里推荐几个自己写的范例项目,主要采用SSM(Spring+SpringMVC+Mybatis)框架,分布式架构采用的是(dubbo+zookeeper)。范例项目的好处是简单易懂,在架构一个新的项目的时候可以直接当成脚手架来用,方便快速开发,另外项目中涉及到以及未来可能涉及到的知识点都会不断完善。 三个项目是互相发展而来的,目前仍在不断完善中,依次为: ..., articleLink=http://wosyingjun.iteye.com/blog/, seeNum=有2871人浏览, goodNum=7顶, badNum=0踩, insertTime=2016-07-21 09:04 ] //Connection connection = Jsoup.connect(url); //Document document = connection.get; Document document = Jsoup.parse(data); Elements indexmain = document.select(".blog"); Iterator<Element> blogIter = indexmain.iterator; IteyeItemEntity item = null; while (blogIter.hasNext) { Element element = blogIter.next; String userName = element.select(".content .blog_info a[title]").text; String userPicLink = element.select(".content .logo img").attr("src"); String userBlogLink = element.select(".content .blog_info a").attr("href"); String title = element.select(".content h3 a[title]").text; String content = element.select(".content div").iterator.next.text; String articleLink = element.select(".content h3 a").last.attr("href"); String seeNum = element.select(".content .blog_info .view").text; String goodNum = element.select(".content .blog_info .digged .digg").text; String badNum = element.select(".content .blog_info .digged .bury").text; String insertTime = element.select(".content .blog_info .date").text; item = new IteyeItemEntity(userName, userPicLink, userBlogLink, title, content, articleLink, seeNum, goodNum, badNum, insertTime); list.add(item); 
package com.couriousby.iteyedemo.util; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import android.util.Log; import android.widget.Toast; import com.couriousby.iteyedemo.MyApplication; import com.couriousby.iteyedemo.entity.IteyeItemEntity; / * @author baoyou E-mail: * @version 2016-7-22 上午10:58:49 * * desc: ... */ public class IteyeJsoupPerformer { public static List<IteyeItemEntity> getListIteyeEntity(String data){ List<IteyeItemEntity> list = new ArrayList<IteyeItemEntity>; try{ //Connection connection = Jsoup.connect(url); //Document document = connection.get; Document document = Jsoup.parse(data); Elements indexmain = document.select(".blog"); Iterator<Element> blogIter = indexmain.iterator; IteyeItemEntity item = null; while (blogIter.hasNext) { Element element = blogIter.next; String userName = element.select(".content .blog_info a[title]").text; String userPicLink = element.select(".content .logo img").attr("src"); String userBlogLink = element.select(".content .blog_info a").attr("href"); String title = element.select(".content h3 a[title]").text; String content = element.select(".content div").iterator.next.text; String articleLink = element.select(".content h3 a").last.attr("href"); String seeNum = element.select(".content .blog_info .view").text; String goodNum = element.select(".content .blog_info .digged .digg").text; String badNum = element.select(".content .blog_info .digged .bury").text; String insertTime = element.select(".content .blog_info .date").text; item = new IteyeItemEntity(userName, userPicLink, userBlogLink, title, content, articleLink, seeNum, goodNum, badNum, insertTime); list.add(item); } }catch(Exception e){ e.printStackTrace; } return list; } 
package com.couriousby.iteyedemo.util; import java.util.ArrayList; import java.util.List; import com.couriousby.iteyedemo.R; import com.couriousby.iteyedemo.entity.GridEntity; / * @author baoyou E-mail: * @version 2016-7-22 下午1:10:55 * * desc: ... */ public class Constants { final static String URL_BASE = "http://www.iteye.com/blogs"; public static String getIteyeCategory(int category ) { switch (category) { case 0: return ""; case 1: return "/category/mobile"; case 2: return "/category/web"; case 3: return "/category/architecture"; case 4: return "/category/language"; case 5: return "/category/internet"; case 6: return "/category/opensource"; case 7: return "/category/os"; case 8: return "/category/database"; case 9: return "/category/develop"; case 10: return "/category/industry"; case 11: return "/category/other"; default: return ""; } } public static String getIteyeUrl(int category,int page) { return URL_BASE + getIteyeCategory(category)+"?page="+page; } public static List<GridEntity> getGridItem { List<GridEntity> list = new ArrayList<GridEntity>; list.add(new GridEntity(0, "全部分类", R.drawable.iteye_all, 0)); list.add(new GridEntity(1, "移动开发", R.drawable.iteye_mobile, 1)); list.add(new GridEntity(2, "web前端", R.drawable.iteye_web, 2)); list.add(new GridEntity(3, "企业架构", R.drawable.iteye_architecture, 3)); list.add(new GridEntity(4, "编程语言", R.drawable.iteye_language, 4)); list.add(new GridEntity(5, "互联网", R.drawable.iteye_internet, 5)); list.add(new GridEntity(6, "开源软件", R.drawable.iteye_opensource, 6)); list.add(new GridEntity(7, "操作系统", R.drawable.iteye_os, 7)); list.add(new GridEntity(8, "数据库", R.drawable.iteye_database, 8)); list.add(new GridEntity(9, "研发管理", R.drawable.iteye_develop, 9)); list.add(new GridEntity(10, "行业应用", R.drawable.iteye_industry, 10)); list.add(new GridEntity(11, "非技术", R.drawable.iteye_other, 11)); return list; } } 
package com.couriousby.iteyedemo.activity; import java.util.ArrayList; import java.util.List; import android.app.Activity; import android.content.Context; import android.content.Intent; import android.graphics.Color; import android.graphics.drawable.BitmapDrawable; import android.graphics.drawable.ColorDrawable; import android.os.Bundle; import android.view.View; import android.view.View.OnClickListener; import android.widget.AdapterView; import android.widget.AdapterView.OnItemClickListener; import android.widget.AdapterView.OnItemSelectedListener; import android.widget.LinearLayout.LayoutParams; import android.widget.GridView; import android.widget.PopupWindow; import android.widget.TextView; import com.couriousby.iteyedemo.MyApplication; import com.couriousby.iteyedemo.R; import com.couriousby.iteyedemo.adapter.IteyeAdapter; import com.couriousby.iteyedemo.adapter.IteyePopwindowGridListAdapter; import com.couriousby.iteyedemo.entity.GridEntity; import com.couriousby.iteyedemo.entity.IteyeItemEntity; import com.couriousby.iteyedemo.listener.OnIteyeGridViewItemclickListener; import com.couriousby.iteyedemo.quote.xlistview.MsgListView; import com.couriousby.iteyedemo.quote.xlistview.MsgListView.IXListViewListener; import com.couriousby.iteyedemo.request.event.IteyeStringHttpEvent; import com.couriousby.iteyedemo.request.event.base.RequestEvent; import com.couriousby.iteyedemo.request.http.IteyeHttpRequest; import com.couriousby.iteyedemo.util.Constants; import com.couriousby.iteyedemo.util.IteyeJsoupPerformer; import de.greenrobot.event.EventBus; public class IteyeMainActivity extends Activity implements IXListViewListener ,OnItemClickListener,OnClickListener,OnIteyeGridViewItemclickListener{ final static String ITEYE_DETAIL_URL = "iteye_detail_url"; private static int start = 1; private static int category = 0; private Context mContext; private MsgListView mListView; private List<IteyeItemEntity> mDataList; private IteyeAdapter mAdapter; private TextView mTopChooseBar; private PopupWindow mPopupWindow; private IteyePopwindowGridListAdapter gridAdapter; private GridView gridView; private List<GridEntity> mGridList; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.iteye_list); this.mContext = IteyeMainActivity.this; initUtils; initView; initListeners; EventBus.getDefault.register( this ); start =1; category=0; IteyeHttpRequest.getIteyeDate(category, start); } private void initUtils { mDataList = new ArrayList<IteyeItemEntity>; mAdapter = new IteyeAdapter(mContext); mAdapter.setmDataList(mDataList); mGridList = Constants.getGridItem; gridAdapter = new IteyePopwindowGridListAdapter(mContext); gridAdapter.setOnIteyeGridViewItemclickListener(this); gridAdapter.setmList(mGridList); } private void initView { mListView = (MsgListView) this.findViewById(R.id._news_list); mListView.setAdapter(mAdapter); View baseView = View.inflate( this, R.layout.iteye_topbar, null ); mTopChooseBar = (TextView) this.findViewById( R.id.tv_iteye_topbar); mPopupWindow = new PopupWindow(baseView ,LayoutParams.MATCH_PARENT, LayoutParams.WRAP_CONTENT, false ); mPopupWindow.setBackgroundDrawable( new BitmapDrawable ); mPopupWindow.setOutsideTouchable( true ); mPopupWindow.setFocusable( true ); gridView = (GridView) baseView.findViewById(R.id.iteye_gr_mlist); gridView.setAdapter(gridAdapter); } private void initListeners { mTopChooseBar.setOnClickListener(this); mListView.setPullLoadEnable(true); mListView.setPullRefreshEnable(true); mListView.setXListViewListener(this); mListView.setAdapter(mAdapter); mListView.setOnItemClickListener(this); gridView.setSelector(new ColorDrawable(Color.TRANSPARENT)); } @Override public void onDestroy { EventBus.getDefault.unregister( this ); super.onDestroy; } public void onEventMainThread(RequestEvent requestEvent){ if(requestEvent instanceof IteyeStringHttpEvent){ IteyeStringHttpEvent event = (IteyeStringHttpEvent) requestEvent; switch(event.status){ case HTTP_ERROR: mListView.stopRefresh; mListView.stopLoadMore; mListView.setPullLoadEnable(false); break; case HTTP_START: { mListView.stopRefresh; mListView.stopLoadMore; String result = event.data; mAdapter.clearMDataList; List<IteyeItemEntity> list = IteyeJsoupPerformer.getListIteyeEntity(result); mAdapter.setmDataList(list); mAdapter.notifyDataSetChanged; } break; case HTTP_SUCCESS: { mListView.stopRefresh; mListView.stopLoadMore; String result = event.data; mAdapter.clearMDataList; List<IteyeItemEntity> list = IteyeJsoupPerformer.getListIteyeEntity(result); mAdapter.addMDataList(list); mAdapter.notifyDataSetChanged; } break; default: break; } }else{ } } @Override public void onItemClick(AdapterView<?> parent, View v, int position, long id) { IteyeItemEntity item = mAdapter.getItem(position - 1 ); if (item != null) { Intent msgIntent = new Intent; Bundle bundle = new Bundle; // Bundle的底层是一个HashMap<String, Object bundle.putString(IteyeMainActivity.ITEYE_DETAIL_URL, item.getArticleLink ); msgIntent.putExtra("bundle", bundle); msgIntent.setClass(MyApplication.newInstance, IteyeDetailActivity.class); startActivityForResult( msgIntent, 1000 ); } } @Override public void onRefresh { start =1; mListView.setPullLoadEnable(true); mListView.setPullRefreshEnable(true); IteyeHttpRequest.getIteyeDate(category, start); } @Override public void onLoadMore { start += 1; IteyeHttpRequest.getIteyeDate(category, start); } @Override public void onClick(View view) { switch (view.getId) { case R.id.tv_iteye_topbar: if (mPopupWindow.isShowing) { mPopupWindow.dismiss; } else { mPopupWindow.showAsDropDown(view); } break; default: break; } } @Override public void OnIteyeGridViewItemclick(GridEntity item) { mTopChooseBar.setText(item.getName ); mPopupWindow.dismiss; start = 1; category = item.getId; IteyeHttpRequest.getIteyeDate(category, start); } } 

免责声明:本站所有文章内容,图片,视频等均是来源于用户投稿和互联网及文摘转载整编而成,不代表本站观点,不承担相关法律责任。其著作权各归其原作者或其出版社所有。如发现本站有涉嫌抄袭侵权/违法违规的内容,侵犯到您的权益,请在线联系站长,一经查实,本站将立刻删除。 本文来自网络,若有侵权,请联系删除,如若转载,请注明出处:https://haidsoft.com/176959.html

(0)
上一篇 2025-04-23 10:15
下一篇 2025-04-23 10:20

相关推荐

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注

关注微信