import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import java.nio.file.Files; import java.nio.file.Paths; import java.io.IOException; import java.util.Base64; import java.util.List; public class WebScraping { public static void main(String[] args) throws Exception { String authString = "<CRAWLERA_APIKEY>:"; String encodedAuthString = Base64.getEncoder().encodeToString(authString.getBytes()); final List<String> urls = Files.readAllLines(Paths.get(".", "urls.txt")); urls.parallelStream().forEach(url -> { try { final Document doc = Jsoup.connect(url) .header("Proxy-Authorization", "Basic " + encodedAuthString) .followRedirects(true) .ignoreHttpErrors(true) .ignoreContentType(true) .timeout(180000) .proxy("proxy.zyte.com", 8010) .get(); final String title = doc.select("title").text(); System.out.println(Thread.currentThread().getName() + ": " + title); } catch (IOException e) { e.printStackTrace(); } }); } }
Using Zyte Smart Proxy Manager with Java and Jsoup
Modified on: Wed, 5 Jan, 2022 at 1:39 PM
Did you find it helpful? Yes No
Send feedbackSorry we couldn't be helpful. Help us improve this article with your feedback.