Monyer发布“百度空间文章列表提取工具”

作者:Monyer
来源:梦之光芒

这是一个Html静态页面的百度空间文章列表提取工具

代码接近300行

为javascript纯脚本编写

都是Monyer一行一行打上去的。

下面介绍一下使用方法:
把代码下到本地保存为*.html文件
(一定要把HTML保存到本地,服务端无法运行)


用IE(最好是IE7)打开html页面,输入自己空间地址,点击初始化。

空间文章的页数会被自动获取,同时获取的还有文章类别(可以点击复选框选择提取单个类别的文章列表),选择好开始页和结束页,点击提取文章列表即可。

当所选取页完成时,会给予提示!如果其中有因为网络问题导致无法访问的url,会给予提示:“因网络问题,未获取的url列表”。结束时,保存功能会被开启,可以直接保存为HTML。

虽然获取时是以div形式显示的,但保存时是以table形式保存的,因为我们可以直接导入Excel。

重新打开保存的html列表文件,点击右键选择“导入到microsoft excel”,excel会被自动开启,我们的文章列表会自动导入到excel中。

至此,我们就简单完成了文章列表的获取到转excel功能,是不是很方便呢?

代码的下载地址为:http://monyer.cn/demo/百度空间文章列表提取工具html源代码.txt (请用flashget等下载软件下载到本地运行!)

这里也贴一下代码:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
<title>百度空间文章列表提取工具</title>
<style>
*{
font-size:12px
}
#head{
text-align:center;
}
#choose{
display: none;
}
#error{
display:none;
}
#savelist{
display:none;
}
.count{
width:50px;float: left; display: block;
}
.url{
width:180px;float: left; display: block;
}
.tit{
width:400px;float: left; display: block;+overflow:hidden;text-overflow:ellipsis;white-space:nowrap;
}
.date{
width:150px;float: left; display: block;
}
.clg{
width:150px;float: left; display: block;
}
</style>
</head>
<script language="javascript" type="text/javascript">
var XmlHttp = new ActiveXObject("Microsoft.XMLHTTP");
var count;
var timer = 1500;
var i;
var start;
var mid;
var end;
var text = "";
var text2 = "";
var texts;
var user = "";
var BlogUrl = "";
var BlogListUrl = "";
var isArtclg;
var page = 0;
var page_end;
var page_min;
var page_max;
var page_min_value;
var page_max_value;
var artclg;
var artclg_a = "<div id=\"m_artclg\" class=\"modbox\">";//文章类别
var artclg_b = "<div id=\"mod_filed\" class=\"mod\">";
var artclg_i = "<div class=\"line\">";
var artclg_c = "title=\"查看该分类中所有文章\">";
var artclg_d = "</a>";
var page_a = "[下一页]";//获取页数
var page_b = "[尾页]";
var page_c = "/blog/index/";
var page_d = "\">";
var bloglist_a = "<div id=\"m_blog\" class=\"modbox\">";//获取文章列表
var bloglist_b = "function setpv(allnum)";
var bloglist_i = "<div class=\"line\">";
var bloglist_mid = "innerHTML = tps;";
var tit;
var tit_a = "target=\"_blank\">";
var tit_b = "</a></div>";
var url;
var url_a = "/blog/item/";
var url_b = ".html";
var date ;
var date_a = "<div class=\"date\">";
var date_b = "</div>";
var clg;
var clg_a = ">类别:";
var clg_b = "</a>";

var bloglist_list;

function $(id){
return document.getElementById(id);
}

window.onload = function Listening(){
$("initialize").onclick = Initialize;
$("getList").onclick = function(){
page_min_value = $("page_min").value;
page_max_value = $("page_max").value;
page = page_min_value;
isArtclg = $("isArtclg").checked;
artclg = $("artclg").value;
count = 0;
getList();
};
$("savelist").onclick = saveList;
}

function getHtml(url,fun){
XmlHttp.Open("get",url,true);
XmlHttp.Send(null);
XmlHttp.onReadyStateChange = function(){
if(XmlHttp.readyState==4){
if(XmlHttp.status==200){
text = XmlHttp.responsetext;
setTimeout(fun,timer);
}else{
$("error").style.cssText = "display:block;text-align:center;";
$("error").insertAdjacentHTML("beforeEnd",url+"<br />");
}
}
};
}

function Initialize(){
if(user != $("blogurl").value) user = $("blogurl").value;
else user += "/";
BlogUrl = "http://hi.baidu.com/" + user + "/blog";
getHtml(BlogUrl,"getArtclg()");
}

function getArtclg(){
artclg_list = $("artclg");
page_min = $("page_min");
page_max = $("page_max");
start = text.indexOf(artclg_a);
end = text.indexOf(artclg_b);
text2 = text.substring(start,end);
texts = text2.split(artclg_i);

for(i=0;i<texts.length-1;i++){
start = texts[i].indexOf(artclg_c) + artclg_c.length;
end = texts[i].indexOf(artclg_d);
text2 = texts[i].substring(start,end);
artclg_list.options[i] = new Option(text2, text2);
}

start = text.indexOf(page_a);
end = text.indexOf(page_b);
text2 = text.substring(start,end);

start = text2.indexOf(page_c) + page_c.length;
end = text2.indexOf(page_d);
page_end = text2.substring(start,end);

for(i=0;i<=page_end;i++){
page_min.options[i] = new Option("第"+(i+1)+"页",i);
page_max.options[i] = new Option("第"+(i+1)+"页",i);
}

$("choose").style.cssText = "display:block;text-align:center;";
}

function getList(){
BlogList = "http://hi.baidu.com/" + user + "/blog/index/" + page;
if(isArtclg) BlogList = "http://hi.baidu.com/" + user + "/blog/category/" + UrlEncode(artclg) + "/index/"+ page;
if(page<=page_max_value){
getHtml(BlogList,"doList();getList();");
}else{
alert("获取完成");
$("savelist").style.cssText = "display:block;";
}
page++;
}

function doList(){
start = text.indexOf(bloglist_a);
end = text.indexOf(bloglist_b);
text2 = text.substring(start,end);
texts = text2.split(bloglist_i);

for(i=0;i<texts.length-1;i++){
start = texts[i].indexOf(url_a) + url_a.length;
end = texts[i].indexOf(url_b);
url = texts[i].substring(start,end);

start = texts[i].indexOf(tit_a) + tit_a.length;
end = texts[i].indexOf(tit_b);
tit = texts[i].substring(start,end);

start = texts[i].indexOf(date_a);
mid = texts[i].indexOf(bloglist_mid) + bloglist_mid.length;
texts[i] = texts[i].substring(start,mid);

start = texts[i].indexOf(date_a) + date_a.length;
end = texts[i].indexOf(date_b);
date = texts[i].substring(start,end);

start = texts[i].indexOf(clg_a);
mid = texts[i].indexOf(bloglist_mid);
texts[i] = texts[i].substring(start,mid);

start = texts[i].indexOf(clg_a) + clg_a.length;
end = texts[i].indexOf(clg_b);
clg = texts[i].substring(start,end);

count++;
text2 = "<br><div>";
text2 += "<span class='count'>" + count + "</span>";
text2 += "<span class='url'>" + url + "</span>";
text2 += "<span class='tit'><a href='http://hi.baidu.com/"+user+"/blog/item/"+url+".html' target='blank'>" + tit + "</a></span>";
text2 += "<span class='date'>" + date + "</span>";
text2 += "<span class='clg'>" + clg + "</span>";
text2 += "</div>";
$("list").insertAdjacentHTML("afterBegin",text2);
}
}

function UrlEncode(str){
var ret="",tt="";
var strSpecial="!\"#$%&'()*+,/:;<=>?[]^`{|}~%";
for(var i=0;i<str.length;i++){
var chr = str.charAt(i);
var c=str2asc(chr);
tt+= chr+":"+c+"n";
if(parseInt("0x"+c) > 0x7f){
ret+="%"+c.slice(0,2)+"%"+c.slice(-2);
}else{
if(chr==" ")
ret+="+";
else if(strSpecial.indexOf(chr)!=-1)
ret+="%"+c.toString(16);
else
ret+=chr;
}
}
return ret;
}

//保存代码
function formatList(){
text = $("list").innerHTML;
text = text.replace(/<BR>/g,"");
text = text.replace(/DIV/g,"tr");
text = text.replace(/SPAN/g,"td");
text = "<table>" + text + "</table>";
return text;
}
function saveList() {
var winname = window.open('', '_blank', '');
winname.document.open('text/html', 'replace');
winname.document.writeln(formatList());
winname.document.close();
winname.document.execCommand('saveas','','Monyer.html');
winname.close();
}
</script>
<body>
<script language="vbscript" type="text/vbscript">
Function str2asc(strstr)
str2asc = hex(asc(strstr))
End Function
</script>
<div id="head">
http://hi.baidu.com/<input name="blogurl" type="text" id="blogurl" value="monyer" />
<input name="initialize" type="button" id="initialize" value="初始化" />
</div>
<div id="choose">
开始页
<select name="page_min" id="page_min">
</select>
结束页
<select name="page_max" id="page_max">
</select><br />

按分类提取?
<input name="isArtclg" type="checkbox" id="isArtclg" value="checkbox" />
<select name="artclg" id="artclg">
</select><br />
<input name="getList" type="button" id="getList" value="提取文章列表" />
<input name="savelist" type="button" id="savelist" value="保存文章列表" />
</div>
<div id="list">
</div>
<div id="error">
因网络问题,未获取的url列表:<br />
</div>
</body>
</html>

没有任何版权信息,大家可以放心使用!

本文欢迎转载,但把别人辛辛苦苦编出来的东西硬说成自己的便是不道德,没人品的行为!

Monyer!

相关日志

楼被抢了 2 层了... 抢座Rss 2.0或者 Trackback

  • woyigui

    这个不错,上一个html版的我用的不行,不管本地还是服务器都不能提取出来,你测试了没?

  • 高手

    厉害啊.百度空间都可以提取..

发表评论