Skip to content

Workers

DefaultWorker

All data extractions goes here.

__init__(self, factory) special

Parameters:

Name Type Description Default
factory BaseFactory

see BaseFactory

required
Source code in extract_emails/workers/default_worker.py
def __init__(self, factory: BaseFactory):
    """
    Args:
        factory: see `BaseFactory`
    """
    self.website_url = factory.website_url
    self.browser = factory.browser
    self.depth = factory.depth
    self.max_links_from_page = factory.max_links_from_page
    self.link_filter = factory.link_filter
    self.data_extractors = factory.data_extractors

    self.links = [[self.website_url]]
    self.current_depth = 0

get_data(self)

Extract data from a given website

Source code in extract_emails/workers/default_worker.py
def get_data(self) -> List[PageData]:
    """Extract data from a given website"""
    data: List[PageData] = []

    while len(self.links):
        logger.debug(f"current_depth={self.current_depth}")
        if self.depth is not None and self.current_depth > self.depth:
            break
        self.current_depth += 1

        new_data = self._get_new_data()
        data.extend(new_data)

    return data