Książka ma {0} stron(y).
" PUBLISH_DATE_TEMPLATE = "Data pierwszego wydania: {0}
" PUBLISH_DATE_PL_TEMPLATE = ( "Data pierwszego wydania w Polsce: {0}
" ) def __init__(self, root: HtmlElement, metadata: Metadata) -> None: self.root = root self.metadata = metadata def parse_search_results(self) -> List[Dict]: matches = [] results = self.root.xpath(LubimyCzytac.BOOK_SEARCH_RESULT_XPATH) for result in results: title = self._parse_xpath_node( root=result, xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}" f"{LubimyCzytac.TITLE_TEXT_PATH}", ) book_url = self._parse_xpath_node( root=result, xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}" f"{LubimyCzytac.URL_PATH}", ) authors = self._parse_xpath_node( root=result, xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}" f"{LubimyCzytac.AUTHORS_PATH}", take_first=False, ) if not all([title, book_url, authors]): continue matches.append( { "id": book_url.replace(f"/ksiazka/", "").split("/")[0], "title": title, "authors": [strip_accents(author) for author in authors], "url": LubimyCzytac.BASE_URL + book_url, } ) return matches def parse_single_book(self, match: Dict, generic_cover: str) -> MetaRecord: response = requests.get(match.get("url")) self.root = fromstring(response.text) match["cover"] = self._parse_cover(generic_cover=generic_cover) match["description"] = self._parse_description() match["languages"] = self._parse_languages() match["publisher"] = self._parse_publisher() match["publishedDate"] = self._parse_from_summary( attribute_name="datePublished" ) match["rating"] = self._parse_rating() match["series"], match["series_index"] = self._parse_series() match["tags"] = self._parse_tags() match["source"] = { "id": self.metadata.__id__, "description": self.metadata.__name__, "link": LubimyCzytac.BASE_URL, } match["identifiers"] = { "isbn": self._parse_isbn(), "lubimyczytac": match["id"], } return match def _parse_xpath_node( self, xpath: str, root: HtmlElement = None, take_first: bool = True, strip_element: bool = True, ) -> Optional[Union[str, List[str]]]: root = root if root is not None else self.root node = root.xpath(xpath) if not node: return None return ( (node[0].strip() if strip_element else node[0]) if take_first else [x.strip() for x in node] ) def _parse_cover(self, generic_cover) -> Optional[str]: return ( self._parse_xpath_node(xpath=LubimyCzytac.COVER, take_first=True) or generic_cover ) def _parse_publisher(self) -> Optional[str]: return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True) def _parse_languages(self) -> List[str]: languages = list() lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True) if lang: if "polski" in lang: languages.append("Polish") if "angielski" in lang: languages.append("English") return languages def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]: series_index = 0 series = self._parse_xpath_node(xpath=LubimyCzytac.SERIES, take_first=True) if series: if "tom " in series: series_name, series_info = series.split(" (tom ", 1) series_info = series_info.replace(" ", "").replace(")", "") # Check if book is not a bundle, i.e. chapter 1-3 if "-" in series_info: series_info = series_info.split("-", 1)[0] if series_info.replace(".", "").isdigit() is True: series_index = get_int_or_float(series_info) return series_name, series_index return None, None def _parse_tags(self) -> List[str]: tags = self._parse_xpath_node(xpath=LubimyCzytac.TAGS, take_first=False) return [ strip_accents(w.replace(", itd.", " itd.")) for w in tags if isinstance(w, str) ] def _parse_from_summary(self, attribute_name: str) -> Optional[str]: value = None summary_text = self._parse_xpath_node(xpath=LubimyCzytac.SUMMARY) if summary_text: data = json.loads(summary_text) value = data.get(attribute_name) return value.strip() if value is not None else value def _parse_rating(self) -> Optional[str]: rating = self._parse_xpath_node(xpath=LubimyCzytac.RATING) return round(float(rating.replace(",", ".")) / 2) if rating else rating def _parse_date(self, xpath="first_publish") -> Optional[datetime.datetime]: options = { "first_publish": LubimyCzytac.FIRST_PUBLISH_DATE, "first_publish_pl": LubimyCzytac.FIRST_PUBLISH_DATE_PL, } date = self._parse_xpath_node(xpath=options.get(xpath)) return parser.parse(date) if date else None def _parse_isbn(self) -> Optional[str]: return self._parse_xpath_node(xpath=LubimyCzytac.ISBN) def _parse_description(self) -> str: description = "" description_node = self._parse_xpath_node( xpath=LubimyCzytac.DESCRIPTION, strip_element=False ) if description_node is not None: for source in self.root.xpath('//p[@class="source"]'): source.getparent().remove(source) description = tostring(description_node, method="html") description = sanitize_comments_html(description) else: description_node = self._parse_xpath_node(xpath=LubimyCzytac.META_TITLE) if description_node is not None: description = description_node description = sanitize_comments_html(description) description = self._add_extra_info_to_description(description=description) return description def _add_extra_info_to_description(self, description: str) -> str: pages = self._parse_from_summary(attribute_name="numberOfPages") if pages: description += LubimyCzytacParser.PAGES_TEMPLATE.format(pages) first_publish_date = self._parse_date() if first_publish_date: description += LubimyCzytacParser.PUBLISH_DATE_TEMPLATE.format( first_publish_date.strftime("%d.%m.%Y") ) first_publish_date_pl = self._parse_date(xpath="first_publish_pl") if first_publish_date_pl: description += LubimyCzytacParser.PUBLISH_DATE_PL_TEMPLATE.format( first_publish_date_pl.strftime("%d.%m.%Y") ) return description