SuumoBrowser.java (4658B)
1 package xyz.kebigon.housesearch.browser.suumo; 2 3 import java.time.LocalDate; 4 import java.util.ArrayList; 5 import java.util.Collection; 6 import java.util.stream.Collectors; 7 8 import org.openqa.selenium.By; 9 import org.openqa.selenium.WebElement; 10 11 import xyz.kebigon.housesearch.browser.Browser; 12 import xyz.kebigon.housesearch.domain.Posting; 13 import xyz.kebigon.housesearch.domain.SearchConditions; 14 import xyz.kebigon.housesearch.domain.SearchConditionsValidator; 15 import xyz.kebigon.housesearch.file.SentPostingsCache; 16 17 public class SuumoBrowser extends Browser 18 { 19 public Collection<Posting> search(SearchConditions conditions, SentPostingsCache sentPostings) 20 { 21 navigateTo(SuumoSearchURLBuilder.build(conditions)); 22 23 final Collection<Posting> postings = new ArrayList<Posting>(); 24 25 do 26 { 27 // Suumo detected us as a bot, we need to restart the browser 28 while (!findElements("//div[@class='l-error']").isEmpty()) 29 restartBrowser(); 30 31 postings.addAll(findElements("//div[@class='property_unit-content']").parallelStream() // 32 .map(SuumoBrowser::createPosting)// 33 .filter(sentPostings::notSent) // 34 .filter(posting -> SearchConditionsValidator.validateBasicConditions(posting, conditions)) // 35 .collect(Collectors.toList())); 36 } while (click("//a[text()='次へ']")); 37 38 return postings; 39 } 40 41 private static Posting createPosting(WebElement posting) 42 { 43 final String url = posting.findElement(By.xpath("./div[@class='property_unit-header']/h2/a")).getAttribute("href"); 44 45 final String priceField = getField(posting, "販売価格"); 46 final int tenthOfThousandsIndex = priceField.indexOf('万'); 47 final int currencyIndex = priceField.indexOf('円', tenthOfThousandsIndex); 48 long price = Long.parseLong(priceField.substring(0, tenthOfThousandsIndex)) * 10000; 49 if (tenthOfThousandsIndex + 1 != currencyIndex) 50 price += Long.parseLong(priceField.substring(tenthOfThousandsIndex + 1, currencyIndex)); 51 52 final String ageField = getField(posting, "築年月"); 53 final int ageSubstringIndex = ageField.indexOf("年"); 54 final int age = LocalDate.now().getYear() - Integer.parseInt(ageField.substring(0, ageSubstringIndex)); 55 56 final Double landSurface = parseSurfaceField(posting, "土地面積"); 57 final Double houseSurface = parseSurfaceField(posting, "建物面積"); 58 59 // JR常磐線「荒川沖」徒歩33分 60 final String stationField = getField(posting, "沿線・駅"); 61 62 final int walkTimeToStationSubstringIndex = stationField.indexOf("徒歩"); 63 Integer walkTimeToStation = null; 64 if (walkTimeToStationSubstringIndex != -1) 65 walkTimeToStation = Integer 66 .parseInt(stationField.substring(walkTimeToStationSubstringIndex + 2, stationField.indexOf("分", walkTimeToStationSubstringIndex))); 67 else 68 { 69 final int carIndex = stationField.indexOf("車"); 70 final int kmIndex = stationField.indexOf("km"); 71 72 if (carIndex != -1 && kmIndex != -1) 73 { 74 final double distanceToStation = Double.parseDouble(stationField.substring(carIndex + 1, kmIndex)); 75 // I'll take 1.43 m/s = 11.7 min/km as average walking speed 76 walkTimeToStation = (int) (distanceToStation * 11.7d); 77 } 78 } 79 80 final int stationOpenBracketIndex = stationField.indexOf('「'); 81 final int stationCloseBracketIndex = stationField.indexOf('」'); 82 final String station = stationOpenBracketIndex != -1 && stationCloseBracketIndex != -1 83 ? stationField.substring(stationOpenBracketIndex + 1, stationCloseBracketIndex) 84 : null; 85 86 return new Posting(url, price, age, landSurface, houseSurface, walkTimeToStation, station); 87 } 88 89 private static Double parseSurfaceField(WebElement posting, String fieldName) 90 { 91 final String surfaceField = getField(posting, fieldName); 92 93 int surfaceSubstringIndex = surfaceField.indexOf("m2"); 94 if (surfaceSubstringIndex == -1) 95 surfaceSubstringIndex = surfaceField.indexOf("㎡"); 96 97 return surfaceSubstringIndex != -1 ? // 98 Double.parseDouble(surfaceField.substring(0, surfaceSubstringIndex)) : // 99 null; 100 } 101 102 private static String getField(WebElement posting, String fieldName) 103 { 104 return posting.findElement(By.xpath(".//dl[dt='" + fieldName + "']/dd")).getText(); 105 } 106 }