Reputation: 615
I write some code
public static void main(String[] args) throws HttpException, IOException,
JSONException {
// TODO Auto-generated method stub
try {
URL murl = new URL(
"http://www.baidu.com/link?url=NaethV_J2hSPVx_OdPlHk73964mU4LcwWkJmVUV4vIkuCXRf1y09ufRZVwkHJqSAa2mMSCoTLYVhGv2AyV_04_");
HttpURLConnection conn = (HttpURLConnection) murl.openConnection();
conn.setRequestProperty(
"User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36");
conn.setConnectTimeout(10000);
conn.connect();
String strencoding = null;
Map<String, List<String>> map = conn.getHeaderFields();
Set<String> keys = map.keySet();
Iterator<String> iterator = keys.iterator();
String key = null;
String tmp = null;
while (iterator.hasNext()) {
key = iterator.next();
tmp = map.get(key).toString().toLowerCase();
if (key != null && key.equals("Content-Type")) {
System.out.println(tmp);
int m = tmp.indexOf("charset=");
if (m != -1) {
strencoding = tmp.substring(m + 8).replace("]", "");
}
}
}
strencoding = strencoding == null ? "UTF-8" : strencoding;
conn.getResponseCode();
// conn.connect();
String href = conn.getURL().toString();
System.out.println(href);
href = href.replace("http://", "");
try {
href = href.split("/")[0];
} catch (Exception eee) {
}
/*
* ParseDomainName pdn = new ParseDomainName(href);
* System.out.println("Your host IP is: " +
* pdn.getMyIP().getHostAddress());
* System.out.println("The Server IP is :" +
* pdn.getServerIP().getHostAddress()); // InputStream inputstream =
* conn.getInputStream();
*/
BufferedReader reader = new BufferedReader(new InputStreamReader(
conn.getInputStream(), strencoding));
String lines;
int i = 1;
while ((lines = reader.readLine()) != null) {
if (lines.toLowerCase().indexOf("charset") > 0) {
System.out.println(lines);
String strtmp = lines;
int inttmp = strtmp.indexOf("charset");
if (inttmp > -1) {
System.out.println(strtmp.length());
strencoding = strtmp
.substring(inttmp + 7, strtmp.length())
.replace("=", "").replace("/", "")
.replace("\"", "").replace("\'", "")
.replace(" ", "").replace("<", "")
.replace(">", "");
break;
}
}
i++;
}
reader.mark(0);
reader.reset();
reader = new BufferedReader(new InputStreamReader(
conn.getInputStream(), strencoding));
while ((lines = reader.readLine()) != null) {
System.out.println(i + " " + lines);
if (lines.toLowerCase().indexOf("icp") > 0) {
// System.out.println(i + " " + lines);
}
i++;
}
System.out.println(i + "---" + strencoding);
reader.close();
conn.disconnect();
} catch (Exception e2) {
e2.printStackTrace();
}
}
the last while
loop,
in first loop I check the page charset and break,
and I reset the reader, and readLine
again,
but in the seconed loop, it start from the position that first loop end.
sometimes it will print the result like that:
and sometimes not read anything in seconed loop, like that:
So what's the problem?
Upvotes: 0
Views: 251
Reputation: 615
finally, I found the problem is in conn.getInputStream()
, it already has been changed after the first loop, So I make some modification, clone the input stream, it is ok now:
public static void main(String[] args) throws HttpException, IOException,
JSONException {
// TODO Auto-generated method stub
try {
URL murl = new URL(
"http://www.baidu.com/link?url=NaethV_J2hSPVx_OdPlHk73964mU4LcwWkJmVUV4vIkuCXRf1y09ufRZVwkHJqSAa2mMSCoTLYVhGv2AyV_04_");
HttpURLConnection conn = (HttpURLConnection) murl.openConnection();
conn.setRequestProperty(
"User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36");
conn.setConnectTimeout(10000);
conn.connect();
String strencoding = null;
strencoding = strencoding == null ? "UTF-8" : strencoding;
conn.getResponseCode();
// conn.connect();
String href = conn.getURL().toString();
System.out.println(href);
href = href.replace("http://", "");
try {
href = href.split("/")[0];
} catch (Exception eee) {
}
InputStream inputStream=conn.getInputStream();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len;
while ((len = inputStream.read(buffer)) > -1 ) {
baos.write(buffer, 0, len);
}
baos.flush();
InputStream copyInputStream1 = new ByteArrayInputStream(baos.toByteArray());
InputStream copyInputStream2 = new ByteArrayInputStream(baos.toByteArray());
BufferedReader reader = new BufferedReader(new InputStreamReader(
copyInputStream1, strencoding));
String lines;
int i = 1;
while ((lines = reader.readLine()) != null) {
if (lines.toLowerCase().indexOf("charset") > 0) {
System.out.println(lines);
String strtmp = lines;
int inttmp = strtmp.indexOf("charset");
if (inttmp > -1) {
System.out.println(strtmp.length());
strencoding = strtmp
.substring(inttmp + 7, strtmp.length())
.replace("=", "").replace("/", "")
.replace("\"", "").replace("\'", "")
.replace(" ", "").replace("<", "")
.replace(">", "");
//break;
}
}
i++;
}
reader = new BufferedReader(new InputStreamReader(
copyInputStream2, strencoding));
while ((lines = reader.readLine()) != null) {
//System.out.println(i + " " + lines);
if (lines.toLowerCase().indexOf("icp") > 0) {
System.out.println(i + " " + lines);
}
i++;
}
System.out.println(i + "---" + strencoding);
reader.close();
conn.disconnect();
} catch (Exception e2) {
e2.printStackTrace();
}
}
Upvotes: 0
Reputation: 76
You must call reader.mark() before the first while loop; reader.mark() essentially saves the current position of the reader so that you can go back to that position when you call reader.reset().
You will also not want to pass in 0 to reader.mark(). See the java spec for the parameter below:
readAheadLimit - Limit on the number of characters that may be read while still preserving the mark. An attempt to reset the stream after reading characters up to this limit or beyond may fail. A limit value larger than the size of the input buffer will cause a new buffer to be allocated whose size is no smaller than limit. Therefore large values should be used with care.
(In other words, passing in 0 will be useless. You need to pass in a number larger than the number of characters read in between mark() and reset()).
Upvotes: 1
Reputation: 18764
reader = new BufferedReader(new InputStreamReader(
conn.getInputStream(), strencoding));
This line is causing your code to create a new reader, and that will cause the reader to start from the beginning.
Upvotes: 0