Galerinha, tenho essa classe que me da o fonte de uma pagina html, como faço pra retirar as tags e ficar apenas com o texto?
[code]package URL;
import java.net.URL;
import java.net.MalformedURLException;
import java.net.HttpURLConnection;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTML.Tag;
public class Lendo {
private String url;
public Lendo(String url)
{
this.url = url;
}
public void conectaURL() throws MalformedURLException, IOException {
URL url = new URL(this.url);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("Request-Method", "GET");
connection.setDoInput(true);
connection.setDoOutput(false);
connection.connect();
BufferedReader br = new BufferedReader(new InputStreamReader(connection.
getInputStream()));
StringBuffer newData = new StringBuffer(10000);
String s = "";
while (null != ((s = br.readLine())))
{
newData.append(s);
}
br.close();
System.out.println(new String(newData));
System.out.println(
"Resultado: " + connection.getResponseCode() + "/" +
connection.getResponseMessage());
PrintWriter out = new PrintWriter(System.out, true);
}
}
[/code]
Abraçs