suumo-search

Perform advanced searches on Suumo.jp
git clone https://git.neuralcrash.com/suumo-search.git
Log | Files | Refs | README

SuumoBrowser.java (4658B)


      1 package xyz.kebigon.housesearch.browser.suumo;
      2 
      3 import java.time.LocalDate;
      4 import java.util.ArrayList;
      5 import java.util.Collection;
      6 import java.util.stream.Collectors;
      7 
      8 import org.openqa.selenium.By;
      9 import org.openqa.selenium.WebElement;
     10 
     11 import xyz.kebigon.housesearch.browser.Browser;
     12 import xyz.kebigon.housesearch.domain.Posting;
     13 import xyz.kebigon.housesearch.domain.SearchConditions;
     14 import xyz.kebigon.housesearch.domain.SearchConditionsValidator;
     15 import xyz.kebigon.housesearch.file.SentPostingsCache;
     16 
     17 public class SuumoBrowser extends Browser
     18 {
     19     public Collection<Posting> search(SearchConditions conditions, SentPostingsCache sentPostings)
     20     {
     21         navigateTo(SuumoSearchURLBuilder.build(conditions));
     22 
     23         final Collection<Posting> postings = new ArrayList<Posting>();
     24 
     25         do
     26         {
     27             // Suumo detected us as a bot, we need to restart the browser
     28             while (!findElements("//div[@class='l-error']").isEmpty())
     29                 restartBrowser();
     30 
     31             postings.addAll(findElements("//div[@class='property_unit-content']").parallelStream() //
     32                     .map(SuumoBrowser::createPosting)//
     33                     .filter(sentPostings::notSent) //
     34                     .filter(posting -> SearchConditionsValidator.validateBasicConditions(posting, conditions)) //
     35                     .collect(Collectors.toList()));
     36         } while (click("//a[text()='次へ']"));
     37 
     38         return postings;
     39     }
     40 
     41     private static Posting createPosting(WebElement posting)
     42     {
     43         final String url = posting.findElement(By.xpath("./div[@class='property_unit-header']/h2/a")).getAttribute("href");
     44 
     45         final String priceField = getField(posting, "販売価格");
     46         final int tenthOfThousandsIndex = priceField.indexOf('万');
     47         final int currencyIndex = priceField.indexOf('円', tenthOfThousandsIndex);
     48         long price = Long.parseLong(priceField.substring(0, tenthOfThousandsIndex)) * 10000;
     49         if (tenthOfThousandsIndex + 1 != currencyIndex)
     50             price += Long.parseLong(priceField.substring(tenthOfThousandsIndex + 1, currencyIndex));
     51 
     52         final String ageField = getField(posting, "築年月");
     53         final int ageSubstringIndex = ageField.indexOf("年");
     54         final int age = LocalDate.now().getYear() - Integer.parseInt(ageField.substring(0, ageSubstringIndex));
     55 
     56         final Double landSurface = parseSurfaceField(posting, "土地面積");
     57         final Double houseSurface = parseSurfaceField(posting, "建物面積");
     58 
     59         // JR常磐線「荒川沖」徒歩33分
     60         final String stationField = getField(posting, "沿線・駅");
     61 
     62         final int walkTimeToStationSubstringIndex = stationField.indexOf("徒歩");
     63         Integer walkTimeToStation = null;
     64         if (walkTimeToStationSubstringIndex != -1)
     65             walkTimeToStation = Integer
     66                     .parseInt(stationField.substring(walkTimeToStationSubstringIndex + 2, stationField.indexOf("分", walkTimeToStationSubstringIndex)));
     67         else
     68         {
     69             final int carIndex = stationField.indexOf("車");
     70             final int kmIndex = stationField.indexOf("km");
     71 
     72             if (carIndex != -1 && kmIndex != -1)
     73             {
     74                 final double distanceToStation = Double.parseDouble(stationField.substring(carIndex + 1, kmIndex));
     75                 // I'll take 1.43 m/s = 11.7 min/km as average walking speed
     76                 walkTimeToStation = (int) (distanceToStation * 11.7d);
     77             }
     78         }
     79 
     80         final int stationOpenBracketIndex = stationField.indexOf('「');
     81         final int stationCloseBracketIndex = stationField.indexOf('」');
     82         final String station = stationOpenBracketIndex != -1 && stationCloseBracketIndex != -1
     83                 ? stationField.substring(stationOpenBracketIndex + 1, stationCloseBracketIndex)
     84                 : null;
     85 
     86         return new Posting(url, price, age, landSurface, houseSurface, walkTimeToStation, station);
     87     }
     88 
     89     private static Double parseSurfaceField(WebElement posting, String fieldName)
     90     {
     91         final String surfaceField = getField(posting, fieldName);
     92 
     93         int surfaceSubstringIndex = surfaceField.indexOf("m2");
     94         if (surfaceSubstringIndex == -1)
     95             surfaceSubstringIndex = surfaceField.indexOf("㎡");
     96 
     97         return surfaceSubstringIndex != -1 ? //
     98                 Double.parseDouble(surfaceField.substring(0, surfaceSubstringIndex)) : //
     99                 null;
    100     }
    101 
    102     private static String getField(WebElement posting, String fieldName)
    103     {
    104         return posting.findElement(By.xpath(".//dl[dt='" + fieldName + "']/dd")).getText();
    105     }
    106 }