o lance é: faço uma pesquisa no google e quero pegar os link’s do resultado…
tenho duas classes
import html.*;
public class Main {
public static void main(String args[]){
HtmlHandler hh = new HtmlHandler();
String [] links = hh.getLinks("http://www.google.com.br/search?hl=pt-BR&q=metalib&btnG=Pesquisa+Google&meta=");
for (int i = 0; i < links.length; i++){
System.out.println(links[i]);
}
}
}
import javax.swing.text.BadLocationException;
import javax.swing.text.SimpleAttributeSet;
import javax.swing.text.EditorKit;
import javax.swing.text.html.*;
public class HtmlHandler {
public String[] getLinks(String uriStr) {
List result = new ArrayList();
try {
// Create a reader on the HTML content
URL url = new URI(uriStr).toURL();
URLConnection conn = url.openConnection();
Reader rd = new InputStreamReader(conn.getInputStream());
// Parse the HTML
EditorKit kit = new HTMLEditorKit();
HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument();
kit.read(rd, doc, 0);
// Find all the A elements in the HTML document
HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A);
while (it.isValid()) {
SimpleAttributeSet s = (SimpleAttributeSet) it.getAttributes();
String link = (String) s.getAttribute(HTML.Attribute.HREF);
if (link != null) {
// Add the link to the result list
result.add(link);
}
it.next();
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
} catch (BadLocationException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
// Return all found links
return (String[]) result.toArray(new String[result.size()]);
}
}
retorna a exceção
java.io.IOException: Server returned HTTP response code: 403 for URL: http://www.google.com.br/search?hl=pt-BR&q=metalib&btnG=Pesquisa+Google&meta=
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(Unknown Source)
at html.HtmlHandler.getLinks(HtmlHandler.java:21)
at Main.main(Main.java:6)

