commit ac70b239e06be73985c506df4f8542840d4311ad
parent 21c7a15aed8412c27ea974e1168e45456b913cd2
Author: Kebigon <git@kebigon.xyz>
Date: Fri, 13 Mar 2020 20:35:51 +0900
Work-around Suumo bot detection
Diffstat:
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/src/main/java/xyz/kebigon/housesearch/browser/Browser.java b/src/main/java/xyz/kebigon/housesearch/browser/Browser.java
@@ -15,7 +15,7 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
public abstract class Browser implements Closeable
{
- private final WebDriver driver;
+ private WebDriver driver;
protected Browser()
{
@@ -34,6 +34,16 @@ public abstract class Browser implements Closeable
driver.navigate().to(url);
}
+ protected void restartBrowser()
+ {
+ final String url = driver.getCurrentUrl();
+ log.info("Restarting browser, navigate to {}", url);
+
+ driver.quit();
+ driver = new HtmlUnitDriver();
+ driver.navigate().to(url);
+ }
+
protected List<WebElement> findElements(String xpathExpression)
{
return driver.findElements(By.xpath(xpathExpression));
@@ -43,7 +53,9 @@ public abstract class Browser implements Closeable
{
try
{
- driver.findElement(By.xpath(xpathExpression)).click();
+ final WebElement element = driver.findElement(By.xpath(xpathExpression));
+ log.info("Click on {}, navigate to {}", element.getText(), element.getAttribute("href"));
+ element.click();
return true;
}
catch (final NoSuchElementException e)
diff --git a/src/main/java/xyz/kebigon/housesearch/browser/suumo/SuumoBrowser.java b/src/main/java/xyz/kebigon/housesearch/browser/suumo/SuumoBrowser.java
@@ -24,6 +24,10 @@ public class SuumoBrowser extends Browser
do
{
+ // Suumo detected us as a bot, we need to restart the browser
+ while (!findElements("//div[@class='l-error']").isEmpty())
+ restartBrowser();
+
postings.addAll(findElements("//div[@class='property_unit-content']").parallelStream() //
.map(SuumoBrowser::createPosting)//
.filter(sentPostings::notSent) //