suumo-search

Perform advanced searches on Suumo.jp
git clone https://git.neuralcrash.com/suumo-search.git
Log | Files | Refs | README

commit d63eb8426caed05ed258dbaa2889318c968004e0
parent bba5d14b40f25c8fd624cd0b71ebc262ab52aed5
Author: Kebigon <git@kebigon.xyz>
Date:   Sun,  8 Mar 2020 15:29:56 +0900

Rework the archive file so it contains the list of already sent postings

Diffstat:
Msrc/main/java/xyz/kebigon/housesearch/HouseSearchApplication.java | 71++++++++++++++++++++++++++++++++++++++++-------------------------------
Msrc/main/java/xyz/kebigon/housesearch/browser/suumo/SuumoBrowser.java | 6+++---
Msrc/main/java/xyz/kebigon/housesearch/file/RoutesCache.java | 3++-
Dsrc/main/java/xyz/kebigon/housesearch/file/SearchArchive.java | 63---------------------------------------------------------------
Asrc/main/java/xyz/kebigon/housesearch/file/SentPostingsCache.java | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 106 insertions(+), 98 deletions(-)

diff --git a/src/main/java/xyz/kebigon/housesearch/HouseSearchApplication.java b/src/main/java/xyz/kebigon/housesearch/HouseSearchApplication.java @@ -13,7 +13,7 @@ import xyz.kebigon.housesearch.browser.yahoo.transit.YahooTransitBrowser; import xyz.kebigon.housesearch.domain.Posting; import xyz.kebigon.housesearch.domain.SearchConditions; import xyz.kebigon.housesearch.domain.SearchConditionsValidator; -import xyz.kebigon.housesearch.file.SearchArchive; +import xyz.kebigon.housesearch.file.SentPostingsCache; import xyz.kebigon.housesearch.mail.EmailSender; @Slf4j @@ -22,51 +22,60 @@ public class HouseSearchApplication public static void main(String[] args) throws IOException, EmailException { final SearchConditions conditions = SearchConditions.load(); + final SentPostingsCache sentPostings = SentPostingsCache.load(); - Collection<Posting> postings; - - try (final SearchArchive archive = new SearchArchive()) + try { + Collection<Posting> postings; + try (final SuumoBrowser suumo = new SuumoBrowser()) { - postings = suumo.search(conditions, archive); + postings = suumo.search(conditions, sentPostings); } - } - - if (postings.isEmpty()) - { - log.info("No postings found on Suumo, terminating"); - return; - } - if (!StringUtils.isEmpty(conditions.getExpression())) - { - try (final YahooTransitBrowser yahooTransit = new YahooTransitBrowser()) + if (postings.isEmpty()) { - ApplicationContext.setYahooTransitBrowser(yahooTransit); - - postings = postings.stream() // Do not parallel here - .filter(property -> SearchConditionsValidator.validateExpression(property, conditions)).collect(Collectors.toList()); + log.info("No postings found on Suumo, terminating"); + return; } - if (postings.isEmpty()) + if (!StringUtils.isEmpty(conditions.getExpression())) { - log.info("No postings left after applying expression filter, terminating"); - return; + try (final YahooTransitBrowser yahooTransit = new YahooTransitBrowser()) + { + ApplicationContext.setYahooTransitBrowser(yahooTransit); + + postings = postings.stream() // Do not parallel here + .filter(property -> SearchConditionsValidator.validateExpression(property, conditions)).collect(Collectors.toList()); + } + + if (postings.isEmpty()) + { + log.info("No postings left after applying expression filter, terminating"); + return; + } } - } - log.info("=======[ RESULTS ]======="); - log.info("Found {} postings", postings.size()); + log.info("=======[ RESULTS ]======="); + log.info("Found {} postings", postings.size()); - for (final Posting posting : postings) - log.info("-> {}", posting); + for (final Posting posting : postings) + log.info("-> {}", posting); - log.info("Sending email notification"); + log.info("Sending email notification"); - final EmailSender sender = new EmailSender(); - sender.send(postings); + final EmailSender sender = new EmailSender(); + sender.send(postings); - log.info("Email notification sent, terminating"); + // Register sent postings + postings.forEach(sentPostings::add); + + log.info("Email notification sent, terminating"); + + } + finally + { + sentPostings.save(); + } } } diff --git a/src/main/java/xyz/kebigon/housesearch/browser/suumo/SuumoBrowser.java b/src/main/java/xyz/kebigon/housesearch/browser/suumo/SuumoBrowser.java @@ -12,11 +12,11 @@ import xyz.kebigon.housesearch.browser.Browser; import xyz.kebigon.housesearch.domain.Posting; import xyz.kebigon.housesearch.domain.SearchConditions; import xyz.kebigon.housesearch.domain.SearchConditionsValidator; -import xyz.kebigon.housesearch.file.SearchArchive; +import xyz.kebigon.housesearch.file.SentPostingsCache; public class SuumoBrowser extends Browser { - public Collection<Posting> search(SearchConditions conditions, SearchArchive archive) + public Collection<Posting> search(SearchConditions conditions, SentPostingsCache sentPostings) { navigateTo(SuumoSearchURLBuilder.build(conditions)); @@ -26,7 +26,7 @@ public class SuumoBrowser extends Browser { postings.addAll(findElements("//div[@class='property_unit-content']").parallelStream() // .map(SuumoBrowser::createPosting)// - .filter(archive::filter) // + .filter(sentPostings::notSent) // .filter(posting -> SearchConditionsValidator.validateBasicConditions(posting, conditions)) // .collect(Collectors.toList())); } while (click("//a[text()='次へ']")); diff --git a/src/main/java/xyz/kebigon/housesearch/file/RoutesCache.java b/src/main/java/xyz/kebigon/housesearch/file/RoutesCache.java @@ -11,6 +11,7 @@ import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.AccessLevel; import lombok.Data; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -18,7 +19,7 @@ import xyz.kebigon.housesearch.domain.Route; @Slf4j @Data -@NoArgsConstructor +@NoArgsConstructor(access = AccessLevel.PRIVATE) public class RoutesCache { private static final File ROUTES_CACHE_FILE = new File("var/state/routes-cache.json"); diff --git a/src/main/java/xyz/kebigon/housesearch/file/SearchArchive.java b/src/main/java/xyz/kebigon/housesearch/file/SearchArchive.java @@ -1,63 +0,0 @@ -package xyz.kebigon.housesearch.file; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.Closeable; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Writer; -import java.util.Collection; -import java.util.HashSet; - -import lombok.extern.slf4j.Slf4j; -import xyz.kebigon.housesearch.domain.Posting; - -@Slf4j -public class SearchArchive implements Closeable -{ - private static final File ARCHIVE_FILE = new File("var/state/archive"); - static - { - ARCHIVE_FILE.getParentFile().mkdirs(); - } - - private final Collection<String> urls = new HashSet<String>(); - - public SearchArchive() throws IOException - { - if (ARCHIVE_FILE.exists()) - { - try (final BufferedReader reader = new BufferedReader(new FileReader(ARCHIVE_FILE))) - { - String line; - while ((line = reader.readLine()) != null) - urls.add(line); - } - - log.info("Loaded {} urls from {}", urls.size(), ARCHIVE_FILE.getAbsolutePath()); - } - } - - public boolean filter(Posting property) - { - if (urls.contains(property.getUrl())) - return false; - - urls.add(property.getUrl()); - return true; - } - - @Override - public void close() throws IOException - { - try (final Writer writer = new BufferedWriter(new FileWriter(ARCHIVE_FILE))) - { - for (final String url : urls) - writer.write(url + '\n'); - } - - log.info("Saved {} urls to {}", urls.size(), ARCHIVE_FILE.getAbsolutePath()); - } -} diff --git a/src/main/java/xyz/kebigon/housesearch/file/SentPostingsCache.java b/src/main/java/xyz/kebigon/housesearch/file/SentPostingsCache.java @@ -0,0 +1,61 @@ +package xyz.kebigon.housesearch.file; + +import java.io.File; +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; + +import com.fasterxml.jackson.core.JsonGenerationException; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import lombok.AccessLevel; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import xyz.kebigon.housesearch.domain.Posting; + +@Slf4j +@Data +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class SentPostingsCache +{ + private static final File SENT_POSTINGS_FILE = new File("var/state/sent-postings.json"); + static + { + SENT_POSTINGS_FILE.getParentFile().mkdirs(); + } + + private final Collection<String> urls = new HashSet<String>(); + + public void add(Posting posting) + { + urls.add(posting.getUrl()); + } + + public boolean notSent(Posting posting) + { + return !urls.contains(posting.getUrl()); + } + + public void save() throws JsonGenerationException, JsonMappingException, IOException + { + log.info("Saving {} sent postings to {}", urls.size(), SENT_POSTINGS_FILE.getAbsolutePath()); + final ObjectMapper mapper = new ObjectMapper(); + mapper.writeValue(SENT_POSTINGS_FILE, this); + } + + public static SentPostingsCache load() throws JsonParseException, JsonMappingException, IOException + { + if (SENT_POSTINGS_FILE.exists()) + { + log.info("Loading sent postings cache from {}", SENT_POSTINGS_FILE.getAbsolutePath()); + + final ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(SENT_POSTINGS_FILE, SentPostingsCache.class); + } + else + return new SentPostingsCache(); + } +}