Skip to content

Browsers

PageSourceGetter

Bases: ABC

All browsers must inherit from this class

Source code in extract_emails/browsers/page_source_getter.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
class PageSourceGetter(ABC):
    """All browsers must inherit from this class"""

    def __enter__(self) -> PageSourceGetter:
        """Context manager enter method.

        Returns:
            Self instance for method chaining
        """
        self.start()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
        """Context manager exit method.

        Args:
            exc_type: Exception type
            exc_val: Exception value
            exc_tb: Exception traceback
        """
        self.stop()

    async def __aenter__(self) -> PageSourceGetter:
        """Async context manager enter method.

        Returns:
            Self instance for method chaining
        """
        await self.astart()
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
        """Async context manager exit method.

        Args:
            exc_type: Exception type
            exc_val: Exception value
            exc_tb: Exception traceback
        """
        await self.astop()

    @abstractmethod
    def start(self) -> None: ...

    @abstractmethod
    def stop(self) -> None: ...

    @abstractmethod
    async def astart(self) -> None: ...

    @abstractmethod
    async def astop(self) -> None: ...

    @abstractmethod
    def get_page_source(self, url: str) -> str:
        """Return page content from an URL

        Args:
            url: URL

        Returns:
            page content (html, json, whatever)
        """
        ...

    @abstractmethod
    async def aget_page_source(self, url: str) -> str:
        """Return page content from an URL asynchronously

        Args:
            url: URL

        Returns:
            page content (html, json, whatever)
        """
        ...

__aenter__() async

Async context manager enter method.

Returns:

Type Description
PageSourceGetter

Self instance for method chaining

Source code in extract_emails/browsers/page_source_getter.py
28
29
30
31
32
33
34
35
async def __aenter__(self) -> PageSourceGetter:
    """Async context manager enter method.

    Returns:
        Self instance for method chaining
    """
    await self.astart()
    return self

__aexit__(exc_type, exc_val, exc_tb) async

Async context manager exit method.

Parameters:

Name Type Description Default
exc_type

Exception type

required
exc_val

Exception value

required
exc_tb

Exception traceback

required
Source code in extract_emails/browsers/page_source_getter.py
37
38
39
40
41
42
43
44
45
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
    """Async context manager exit method.

    Args:
        exc_type: Exception type
        exc_val: Exception value
        exc_tb: Exception traceback
    """
    await self.astop()

__enter__()

Context manager enter method.

Returns:

Type Description
PageSourceGetter

Self instance for method chaining

Source code in extract_emails/browsers/page_source_getter.py
 9
10
11
12
13
14
15
16
def __enter__(self) -> PageSourceGetter:
    """Context manager enter method.

    Returns:
        Self instance for method chaining
    """
    self.start()
    return self

__exit__(exc_type, exc_val, exc_tb)

Context manager exit method.

Parameters:

Name Type Description Default
exc_type

Exception type

required
exc_val

Exception value

required
exc_tb

Exception traceback

required
Source code in extract_emails/browsers/page_source_getter.py
18
19
20
21
22
23
24
25
26
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
    """Context manager exit method.

    Args:
        exc_type: Exception type
        exc_val: Exception value
        exc_tb: Exception traceback
    """
    self.stop()

aget_page_source(url) abstractmethod async

Return page content from an URL asynchronously

Parameters:

Name Type Description Default
url str

URL

required

Returns:

Type Description
str

page content (html, json, whatever)

Source code in extract_emails/browsers/page_source_getter.py
71
72
73
74
75
76
77
78
79
80
81
@abstractmethod
async def aget_page_source(self, url: str) -> str:
    """Return page content from an URL asynchronously

    Args:
        url: URL

    Returns:
        page content (html, json, whatever)
    """
    ...

get_page_source(url) abstractmethod

Return page content from an URL

Parameters:

Name Type Description Default
url str

URL

required

Returns:

Type Description
str

page content (html, json, whatever)

Source code in extract_emails/browsers/page_source_getter.py
59
60
61
62
63
64
65
66
67
68
69
@abstractmethod
def get_page_source(self, url: str) -> str:
    """Return page content from an URL

    Args:
        url: URL

    Returns:
        page content (html, json, whatever)
    """
    ...