Skip to content

cocorum.scraping

The primary use from this module is the Scraper class, used for getting data from Rumble web pages, or iun the rare case where HTML is passed by one of the APIs' endpoints. You must first create an instance of cocorum.servicephp.ServicePHP(), and then pass it to this class upon initialization. All other classes are supporting sub-classes.

Scraping for Cocorum

Classes and utilities for extracting data from HTML, including that returned by the API. S.D.G.

HTMLChannel

Bases: HTMLObj

Channel under a user as extracted from their channels page

Source code in cocorum/scraping.py
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
class HTMLChannel(HTMLObj):
    """Channel under a user as extracted from their channels page"""

    def __str__(self):
        """The channel as a string (its slug)"""
        return self.slug

    def __int__(self):
        """The channel as an integer (its numeric ID)"""
        return self.channel_id_b10

    def __eq__(self, other):
        """Determine if this channel is equal to another.

    Args:
        other (int, str, HTMLChannel): Object to compare to.

    Returns:
        Comparison (bool, None): Did it fit the criteria?
        """

        #Check for direct matches first
        if isinstance(other, int):
            return self.channel_id_b10 == other
        if isinstance(other, str):
            return str(other) in (self.slug, self.channel_id_b36)

        #Check for object attributes to match to
        if hasattr(other, "channel_id"):
            return self.channel_id_b10 == utils.ensure_b10(other.channel_id)
        if hasattr(other, "slug"):
            return self.slug == other.slug

        #Check conversion to integer last, in case an ID or something happens to match but the other is not actually a channel
        if hasattr(other, "__int__"):
            return self.channel_id_b10 == int(other)

    @property
    def slug(self):
        """The unique string ID of the channel"""
        return self["data-slug"]

    @property
    def channel_id(self):
        """The numeric ID of the channel in base 10"""
        return int(self["data-id"])

    @property
    def channel_id_b10(self):
        """The numeric ID of the channel in base 10"""
        return self.channel_id

    @property
    def channel_id_b36(self):
        """The numeric ID of the channel in base 36"""
        return utils.base_10_to_36(self.channel_id)

    @property
    def title(self):
        """The title of the channel"""
        return self["data-title"]

channel_id property

The numeric ID of the channel in base 10

channel_id_b10 property

The numeric ID of the channel in base 10

channel_id_b36 property

The numeric ID of the channel in base 36

slug property

The unique string ID of the channel

title property

The title of the channel

__eq__(other)

Determine if this channel is equal to another.

Parameters:

Name Type Description Default
other (int, str, HTMLChannel)

Object to compare to.

required

Returns:

Name Type Description
Comparison (bool, None)

Did it fit the criteria?

Source code in cocorum/scraping.py
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
def __eq__(self, other):
    """Determine if this channel is equal to another.

Args:
    other (int, str, HTMLChannel): Object to compare to.

Returns:
    Comparison (bool, None): Did it fit the criteria?
    """

    #Check for direct matches first
    if isinstance(other, int):
        return self.channel_id_b10 == other
    if isinstance(other, str):
        return str(other) in (self.slug, self.channel_id_b36)

    #Check for object attributes to match to
    if hasattr(other, "channel_id"):
        return self.channel_id_b10 == utils.ensure_b10(other.channel_id)
    if hasattr(other, "slug"):
        return self.slug == other.slug

    #Check conversion to integer last, in case an ID or something happens to match but the other is not actually a channel
    if hasattr(other, "__int__"):
        return self.channel_id_b10 == int(other)

__int__()

The channel as an integer (its numeric ID)

Source code in cocorum/scraping.py
394
395
396
def __int__(self):
    """The channel as an integer (its numeric ID)"""
    return self.channel_id_b10

__str__()

The channel as a string (its slug)

Source code in cocorum/scraping.py
390
391
392
def __str__(self):
    """The channel as a string (its slug)"""
    return self.slug

HTMLComment

Bases: HTMLObj

A comment on a video as returned by service.php comment.list

Source code in cocorum/scraping.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
class HTMLComment(HTMLObj):
    """A comment on a video as returned by service.php comment.list"""
    def __init__(self, elem):
        """A comment on a video as returned by service.php comment.list

    Args:
        elem (bs4.Tag): The <li> element of the comment.
        """

        super().__init__(elem)

        #Badges of the user who commented if we have them
        badges_unkeyed = (HTMLUserBadge(badge_elem) for badge_elem in self._elem.find_all("li", attrs = {"class" : "comments-meta-user-badge"}))
        self.user_badges = {badge.slug : badge for badge in badges_unkeyed}

    def __int__(self):
        """The comment in integer form (its ID)"""
        return self.comment_id

    def __str__(self):
        """The comment as a string (its text)"""
        return self.text

    def __eq__(self, other):
        """Determine if this comment is equal to another.

    Args:
        other (int, str, HTMLComment): Object to compare to.

    Returns:
        Comparison (bool, None): Did it fit the criteria?
        """

        #Check for direct matches first
        if isinstance(other, int):
            return self.comment_id_b10 == other
        if isinstance(other, str):
            return str(self) == other

        #Check for object attributes to match to
        if hasattr(other, "comment_id"):
            return self.comment_id_b10 == utils.ensure_b10(other.comment_id)

        #Check conversion to integer last
        if hasattr(other, "__int__"):
            return self.comment_id_b10 == int(other)

    @property
    def is_first(self):
        """Is this comment the first one?"""
        return "comment-item-first" in self["class"]

    @property
    def comment_id(self):
        """The numeric ID of the comment in base 10"""
        return int(self["data-comment-id"])

    @property
    def comment_id_b10(self):
        """The base 10 ID of the comment"""
        return self.comment_id

    @property
    def comment_id_b36(self):
        """The base 36 ID of the comment"""
        return utils.base_10_to_36(self.comment_id)

    @property
    def text(self):
        """The text of the comment"""
        return self._elem.find("p", attrs = {"class" : "comment-text"}).string

    @property
    def username(self):
        """The name of the user who commented"""
        return self["data-username"]

    @property
    def entity_type(self):
        """Wether the comment was made by a user or a channel"""
        return self["data-entity-type"]

    @property
    def video_id(self):
        """The base 10 ID of the video the comment was posted on"""
        return self["data-video-fid"]

    @property
    def video_id_b10(self):
        """The base 10 ID of the video the comment was posted on"""
        return self.video_id

    @property
    def video_id_b36(self):
        """The base 36 ID of the video the comment was posted on"""
        return utils.base_10_to_36(self.video_id)

    @property
    def actions(self):
        """Allowed actions on this comment based on the login used to retrieve it"""
        return self["data-actions"].split(",")

    @property
    def rumbles(self):
        """The votes on this comment"""
        return HTMLContentVotes(self._elem.find("div", attrs = {"class" : "rumbles-vote"}))

actions property

Allowed actions on this comment based on the login used to retrieve it

comment_id property

The numeric ID of the comment in base 10

comment_id_b10 property

The base 10 ID of the comment

comment_id_b36 property

The base 36 ID of the comment

entity_type property

Wether the comment was made by a user or a channel

is_first property

Is this comment the first one?

rumbles property

The votes on this comment

text property

The text of the comment

username property

The name of the user who commented

video_id property

The base 10 ID of the video the comment was posted on

video_id_b10 property

The base 10 ID of the video the comment was posted on

video_id_b36 property

The base 36 ID of the video the comment was posted on

__eq__(other)

Determine if this comment is equal to another.

Parameters:

Name Type Description Default
other (int, str, HTMLComment)

Object to compare to.

required

Returns:

Name Type Description
Comparison (bool, None)

Did it fit the criteria?

Source code in cocorum/scraping.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def __eq__(self, other):
    """Determine if this comment is equal to another.

Args:
    other (int, str, HTMLComment): Object to compare to.

Returns:
    Comparison (bool, None): Did it fit the criteria?
    """

    #Check for direct matches first
    if isinstance(other, int):
        return self.comment_id_b10 == other
    if isinstance(other, str):
        return str(self) == other

    #Check for object attributes to match to
    if hasattr(other, "comment_id"):
        return self.comment_id_b10 == utils.ensure_b10(other.comment_id)

    #Check conversion to integer last
    if hasattr(other, "__int__"):
        return self.comment_id_b10 == int(other)

__init__(elem)

A comment on a video as returned by service.php comment.list

Parameters:

Name Type Description Default
elem Tag

The

  • element of the comment.

  • required
    Source code in cocorum/scraping.py
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    def __init__(self, elem):
        """A comment on a video as returned by service.php comment.list
    
    Args:
        elem (bs4.Tag): The <li> element of the comment.
        """
    
        super().__init__(elem)
    
        #Badges of the user who commented if we have them
        badges_unkeyed = (HTMLUserBadge(badge_elem) for badge_elem in self._elem.find_all("li", attrs = {"class" : "comments-meta-user-badge"}))
        self.user_badges = {badge.slug : badge for badge in badges_unkeyed}
    

    __int__()

    The comment in integer form (its ID)

    Source code in cocorum/scraping.py
    104
    105
    106
    def __int__(self):
        """The comment in integer form (its ID)"""
        return self.comment_id
    

    __str__()

    The comment as a string (its text)

    Source code in cocorum/scraping.py
    108
    109
    110
    def __str__(self):
        """The comment as a string (its text)"""
        return self.text
    

    HTMLContentVotes

    Bases: HTMLObj

    Votes made on content

    Source code in cocorum/scraping.py
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    class HTMLContentVotes(HTMLObj):
        """Votes made on content"""
    
        def __int__(self):
            """The integer form of the content votes"""
            return self.score
    
        def __str__(self):
            """The string form of the content votes"""
            #return self.score_formatted
            return str(self.score)
    
        def __eq__(self, other):
            """Determine if this content votes is equal to another.
    
        Args:
            other (int, str, HTMLContentVotes): Object to compare to.
    
        Returns:
            Comparison (bool, None): Did it fit the criteria?
            """
    
            #Check for direct matches first
            if isinstance(other, int):
                return self.score == other
            if isinstance(other, str):
                return str(self) == other
    
            #Check for object attributes to match to
            if hasattr(other, "score"):
                #if hasattr(other, "content_id") and hasattr(other, "content_type"):
                #    return self.score, self.content_id, self.content_type == other.score, other.content_id, other.content_type
                return self.score == other.score
    
            #Check conversion to integer last
            if hasattr(other, "__int__"):
                return self.score == int(other)
    
        @property
        def score(self):
            """Summed score of the content"""
            return int(self._elem.find("span", attrs = {"class" : "rumbles-count"}).string)
    
        @property
        def content_type(self):
            """The type of content being voted on"""
            return int(self["data-type"])
    
        @property
        def content_id(self):
            """The numerical ID of the content being voted on"""
            return int(self["data-id"])
    

    content_id property

    The numerical ID of the content being voted on

    content_type property

    The type of content being voted on

    score property

    Summed score of the content

    __eq__(other)

    Determine if this content votes is equal to another.

    Parameters:

    Name Type Description Default
    other (int, str, HTMLContentVotes)

    Object to compare to.

    required

    Returns:

    Name Type Description
    Comparison (bool, None)

    Did it fit the criteria?

    Source code in cocorum/scraping.py
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    def __eq__(self, other):
        """Determine if this content votes is equal to another.
    
    Args:
        other (int, str, HTMLContentVotes): Object to compare to.
    
    Returns:
        Comparison (bool, None): Did it fit the criteria?
        """
    
        #Check for direct matches first
        if isinstance(other, int):
            return self.score == other
        if isinstance(other, str):
            return str(self) == other
    
        #Check for object attributes to match to
        if hasattr(other, "score"):
            #if hasattr(other, "content_id") and hasattr(other, "content_type"):
            #    return self.score, self.content_id, self.content_type == other.score, other.content_id, other.content_type
            return self.score == other.score
    
        #Check conversion to integer last
        if hasattr(other, "__int__"):
            return self.score == int(other)
    

    __int__()

    The integer form of the content votes

    Source code in cocorum/scraping.py
    199
    200
    201
    def __int__(self):
        """The integer form of the content votes"""
        return self.score
    

    __str__()

    The string form of the content votes

    Source code in cocorum/scraping.py
    203
    204
    205
    206
    def __str__(self):
        """The string form of the content votes"""
        #return self.score_formatted
        return str(self.score)
    

    HTMLObj

    Abstract object scraped from bs4 HTML

    Source code in cocorum/scraping.py
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    class HTMLObj:
        """Abstract object scraped from bs4 HTML"""
        def __init__(self, elem):
            """Abstract object scraped from bs4 HTML
    
        Args:
            elem (bs4.Tag): The BeautifulSoup element to base our data on.
            """
    
            self._elem = elem
    
        def __getitem__(self, key):
            """Get a key from the element attributes
    
        Args:
            key (str): A valid attribute name.
            """
    
            return self._elem.attrs[key]
    

    __getitem__(key)

    Get a key from the element attributes

    Parameters:

    Name Type Description Default
    key str

    A valid attribute name.

    required
    Source code in cocorum/scraping.py
    23
    24
    25
    26
    27
    28
    29
    30
    def __getitem__(self, key):
        """Get a key from the element attributes
    
    Args:
        key (str): A valid attribute name.
        """
    
        return self._elem.attrs[key]
    

    __init__(elem)

    Abstract object scraped from bs4 HTML

    Parameters:

    Name Type Description Default
    elem Tag

    The BeautifulSoup element to base our data on.

    required
    Source code in cocorum/scraping.py
    14
    15
    16
    17
    18
    19
    20
    21
    def __init__(self, elem):
        """Abstract object scraped from bs4 HTML
    
    Args:
        elem (bs4.Tag): The BeautifulSoup element to base our data on.
        """
    
        self._elem = elem
    

    HTMLPlaylist

    Bases: HTMLObj

    A playlist as obtained from HTML data

    Source code in cocorum/scraping.py
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    338
    339
    340
    341
    342
    343
    344
    345
    346
    347
    348
    349
    350
    351
    352
    353
    354
    355
    356
    357
    358
    359
    360
    361
    362
    363
    364
    365
    366
    367
    368
    369
    370
    371
    372
    373
    374
    375
    376
    377
    378
    379
    380
    381
    382
    383
    384
    385
    class HTMLPlaylist(HTMLObj):
        """A playlist as obtained from HTML data"""
        def __init__(self, elem, scraper):
            """A playlist as obtained from HTML data.
    
        Args:
            elem (bs4.Tag): The playlist class = "thumbnail__grid-item" element.
            scraper (Scraper): The HTML scraper object that spawned us.
            """
    
            super().__init__(elem)
    
            #The Scraper object that created this one
            self.scraper = scraper
    
            #The binary data of our thumbnail
            self.__thumbnail = None
    
            #The loaded page of the playlist
            self.__pagesoup = None
    
        def __int__(self):
            """The playlist as an integer (it's ID in base 10)"""
            return self.playlist_id_b10
    
        def __str__(self):
            """The playlist as a string (it's ID in base 36)"""
            return self.playlist_id_b36
    
        def __eq__(self, other):
            """Determine if this playlist is equal to another.
    
        Args:
            other (int, str, HTMLPlaylist): Object to compare to.
    
        Returns:
            Comparison (bool, None): Did it fit the criteria?
            """
    
            #Check for direct matches first
            if isinstance(other, int):
                return self.playlist_id_b10 == other
            if isinstance(other, str):
                return str(other) == self.playlist_id_b36
    
            #Check for object attributes to match to
            if hasattr(other, "playlist_id"):
                return self.playlist_id_b10 == utils.ensure_b10(other.playlist_id)
    
            #Check conversion to integer last, in case another ID or something happens to match
            if hasattr(other, "__int__"):
                return self.playlist_id_b10 == int(other)
    
        @property
        def _pagesoup(self):
            """The loaded page of the playlist"""
            if not self.__pagesoup:
                self.__pagesoup = self.scraper.soup_request(self.url)
    
            return self.__pagesoup
    
        @property
        def thumbnail_url(self):
            """The url of the playlist's thumbnail image"""
            return self._elem.find("img", attrs = {"class" : "thumbnail__image"}).get("src")
    
        @property
        def thumbnail(self):
            """The playlist thumbnail as a binary string"""
            if not self.__thumbnail: #We never queried the thumbnail before
                response = requests.get(self.thumbnail_url, timeout = static.Delays.request_timeout)
                assert response.status_code == 200, "Status code " + str(response.status_code)
    
                self.__thumbnail = response.content
    
            return self.__thumbnail
    
        @property
        def _url_raw(self):
            """The URL of the playlist page (without Rumble base URL)"""
            return self._elem.find("a", attrs = {"class" : "playlist__name link"}).get("href")
    
        @property
        def url(self):
            """The URL of the playlist page """
            return static.URI.rumble_base + self._url_raw
    
        @property
        def playlist_id(self):
            """The numeric ID of the playlist in base 36"""
            return self._url_raw.split("/")[-1]
    
        @property
        def playlist_id_b36(self):
            """The numeric ID of the playlist in base 36"""
            return self.playlist_id
    
        @property
        def playlist_id_b10(self):
            """The numeric ID of the playlist in base 10"""
            return utils.base_36_to_10(self.playlist_id)
    
        @property
        def _channel_url_raw(self):
            """The URL of the channel the playlist under (without base URL)"""
            return self._elem.find("a", attrs = {"class" : "channel__link link"}).get("href")
    
        @property
        def channel_url(self):
            """The URL of the base user or channel the playlist under"""
            return static.URI.rumble_base + self._channel_url_raw
    
        @property
        def is_under_channel(self):
            """Is this playlist under a channel?"""
            return self._channel_url_raw.startswith("/c/")
    
        @property
        def title(self):
            """The title of the playlist"""
            return self._pagesoup.find("h1", attrs = {"class" : "playlist-control-panel__playlist-name"}).string.strip()
    
        @property
        def description(self):
            """The description of the playlist"""
            return self._pagesoup.find("div", attrs = {"class" : "playlist-control-panel__description"}).string.strip()
    
        @property
        def visibility(self):
            """The visibility of the playlist"""
            return self._pagesoup.find("span", attrs = {"class" : "playlist-control-panel__visibility-state"}).string.strip().lower()
    
        @property
        def num_items(self):
            """The number of items in the playlist"""
            #This is doable but I just don't care right now
            NotImplemented
    

    channel_url property

    The URL of the base user or channel the playlist under

    description property

    The description of the playlist

    is_under_channel property

    Is this playlist under a channel?

    num_items property

    The number of items in the playlist

    playlist_id property

    The numeric ID of the playlist in base 36

    playlist_id_b10 property

    The numeric ID of the playlist in base 10

    playlist_id_b36 property

    The numeric ID of the playlist in base 36

    thumbnail property

    The playlist thumbnail as a binary string

    thumbnail_url property

    The url of the playlist's thumbnail image

    title property

    The title of the playlist

    url property

    The URL of the playlist page

    visibility property

    The visibility of the playlist

    __eq__(other)

    Determine if this playlist is equal to another.

    Parameters:

    Name Type Description Default
    other (int, str, HTMLPlaylist)

    Object to compare to.

    required

    Returns:

    Name Type Description
    Comparison (bool, None)

    Did it fit the criteria?

    Source code in cocorum/scraping.py
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    def __eq__(self, other):
        """Determine if this playlist is equal to another.
    
    Args:
        other (int, str, HTMLPlaylist): Object to compare to.
    
    Returns:
        Comparison (bool, None): Did it fit the criteria?
        """
    
        #Check for direct matches first
        if isinstance(other, int):
            return self.playlist_id_b10 == other
        if isinstance(other, str):
            return str(other) == self.playlist_id_b36
    
        #Check for object attributes to match to
        if hasattr(other, "playlist_id"):
            return self.playlist_id_b10 == utils.ensure_b10(other.playlist_id)
    
        #Check conversion to integer last, in case another ID or something happens to match
        if hasattr(other, "__int__"):
            return self.playlist_id_b10 == int(other)
    

    __init__(elem, scraper)

    A playlist as obtained from HTML data.

    Parameters:

    Name Type Description Default
    elem Tag

    The playlist class = "thumbnail__grid-item" element.

    required
    scraper Scraper

    The HTML scraper object that spawned us.

    required
    Source code in cocorum/scraping.py
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    def __init__(self, elem, scraper):
        """A playlist as obtained from HTML data.
    
    Args:
        elem (bs4.Tag): The playlist class = "thumbnail__grid-item" element.
        scraper (Scraper): The HTML scraper object that spawned us.
        """
    
        super().__init__(elem)
    
        #The Scraper object that created this one
        self.scraper = scraper
    
        #The binary data of our thumbnail
        self.__thumbnail = None
    
        #The loaded page of the playlist
        self.__pagesoup = None
    

    __int__()

    The playlist as an integer (it's ID in base 10)

    Source code in cocorum/scraping.py
    270
    271
    272
    def __int__(self):
        """The playlist as an integer (it's ID in base 10)"""
        return self.playlist_id_b10
    

    __str__()

    The playlist as a string (it's ID in base 36)

    Source code in cocorum/scraping.py
    274
    275
    276
    def __str__(self):
        """The playlist as a string (it's ID in base 36)"""
        return self.playlist_id_b36
    

    HTMLUserBadge

    Bases: HTMLObj

    A user badge as extracted from a bs4 HTML element

    Source code in cocorum/scraping.py
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    class HTMLUserBadge(HTMLObj):
        """A user badge as extracted from a bs4 HTML element"""
        def __init__(self, elem):
            """A user badge as extracted from a bs4 HTML element.
    
        Args:
            elem (bs4.Tag): The badge <img> element
            """
    
            super().__init__(elem)
            self.slug = elem.attrs["src"].split("/")[-1:elem.attrs["src"].rfind("_")]
            self.__icon = None
    
        def __eq__(self, other):
            """Check if this badge is equal to another.
    
        Args:
            other (str, HTMLUserBadge): Object to compare to.
    
        Returns:
            Comparison (bool, None): Did it fit the criteria?
            """
    
            #Check if the string is either our slug or our label in any language
            if isinstance(other, str):
                return other in (self.slug, self.label.values())
    
            #Check if the compared object has the same slug, if it has one
            if hasattr(other, "slug"):
                return self.slug == other.slug
    
        def __str__(self):
            """The chat user badge in string form"""
            return self.slug
    
        @property
        def label(self):
            """The string label of the badge in whatever language the Service.PHP agent used"""
            return self["title"]
    
        @property
        def icon_url(self):
            """The URL of the badge's icon"""
            return static.URI.rumble_base + self["src"]
    
        @property
        def icon(self):
            """The badge's icon as a bytestring"""
            if not self.__icon: #We never queried the icon before
                #TODO make the timeout configurable
                response = requests.get(self.icon_url, timeout = static.Delays.request_timeout)
                assert response.status_code == 200, "Status code " + str(response.status_code)
    
                self.__icon = response.content
    
            return self.__icon
    

    icon property

    The badge's icon as a bytestring

    icon_url property

    The URL of the badge's icon

    label property

    The string label of the badge in whatever language the Service.PHP agent used

    __eq__(other)

    Check if this badge is equal to another.

    Parameters:

    Name Type Description Default
    other (str, HTMLUserBadge)

    Object to compare to.

    required

    Returns:

    Name Type Description
    Comparison (bool, None)

    Did it fit the criteria?

    Source code in cocorum/scraping.py
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    def __eq__(self, other):
        """Check if this badge is equal to another.
    
    Args:
        other (str, HTMLUserBadge): Object to compare to.
    
    Returns:
        Comparison (bool, None): Did it fit the criteria?
        """
    
        #Check if the string is either our slug or our label in any language
        if isinstance(other, str):
            return other in (self.slug, self.label.values())
    
        #Check if the compared object has the same slug, if it has one
        if hasattr(other, "slug"):
            return self.slug == other.slug
    

    __init__(elem)

    A user badge as extracted from a bs4 HTML element.

    Parameters:

    Name Type Description Default
    elem Tag

    The badge element

    required
    Source code in cocorum/scraping.py
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    def __init__(self, elem):
        """A user badge as extracted from a bs4 HTML element.
    
    Args:
        elem (bs4.Tag): The badge <img> element
        """
    
        super().__init__(elem)
        self.slug = elem.attrs["src"].split("/")[-1:elem.attrs["src"].rfind("_")]
        self.__icon = None
    

    __str__()

    The chat user badge in string form

    Source code in cocorum/scraping.py
    63
    64
    65
    def __str__(self):
        """The chat user badge in string form"""
        return self.slug
    

    HTMLVideo

    Bases: HTMLObj

    Video on a user or channel page as extracted from the page's HTML

    Source code in cocorum/scraping.py
    449
    450
    451
    452
    453
    454
    455
    456
    457
    458
    459
    460
    461
    462
    463
    464
    465
    466
    467
    468
    469
    470
    471
    472
    473
    474
    475
    476
    477
    478
    479
    480
    481
    482
    483
    484
    485
    486
    487
    488
    489
    490
    491
    492
    493
    494
    495
    496
    497
    498
    499
    500
    501
    502
    503
    504
    505
    506
    507
    508
    509
    510
    511
    512
    513
    514
    515
    516
    517
    518
    519
    520
    521
    522
    523
    524
    525
    526
    527
    528
    529
    530
    531
    532
    533
    534
    535
    536
    537
    538
    539
    540
    541
    class HTMLVideo(HTMLObj):
        """Video on a user or channel page as extracted from the page's HTML"""
        def __init__(self, elem):
            """Video on a user or channel page as extracted from the page's HTML.
    
        Args:
            elem (bs4.Tag): The class = "thumbnail__grid-item" video element.
            """
    
            super().__init__(elem)
    
            #The binary data of our thumbnail
            self.__thumbnail = None
    
        def __int__(self):
            """The video as an integer (it's numeric ID)"""
            return self.video_id_b10
    
        def __str__(self):
            """The video as a string (it's ID in base 36)"""
            return self.video_id_b36
    
        def __eq__(self, other):
            """Determine if this video is equal to another.
    
        Args:
            other (int, str, HTMLVideo): Object to compare to.
    
        Returns:
            Comparison (bool, None): Did it fit the criteria?
            """
    
            #Check for direct matches first
            if isinstance(other, int):
                return self.video_id_b10 == other
            if isinstance(other, str):
                return str(other) == self.video_id_b36
    
            #Check for object attributes to match to
            if hasattr(other, "video_id"):
                return self.video_id_b10 == utils.ensure_b10(other.video_id)
            if hasattr(other, "stream_id"):
                return self.video_id_b10 == utils.ensure_b10(other.stream_id)
    
            #Check conversion to integer last, in case another ID or something happens to match
            if hasattr(other, "__int__"):
                return self.video_id_b10 == int(other)
    
        @property
        def video_id(self):
            """The numeric ID of the video in base 10"""
            return int(self._elem.get("data-video-id"))
    
        @property
        def video_id_b10(self):
            """The numeric ID of the video in base 10"""
            return self.video_id
    
        @property
        def video_id_b36(self):
            """The numeric ID of the video in base 36"""
            return utils.base_10_to_36(self.video_id)
    
        @property
        def thumbnail_url(self):
            """The URL of the video's thumbnail image"""
            return self._elem.find("img", attrs = {"class" : "thumbnail__image"}).get("src")
    
        @property
        def thumbnail(self):
            """The video thumbnail as a binary string"""
            if not self.__thumbnail: #We never queried the thumbnail before
                response = requests.get(self.thumbnail_url, timeout = static.Delays.request_timeout)
                assert response.status_code == 200, "Status code " + str(response.status_code)
    
                self.__thumbnail = response.content
    
            return self.__thumbnail
    
        @property
        def video_url(self):
            """The URL of the video's viewing page"""
            return static.URI.rumble_base + self._elem.find("a", attrs = {"class" : "videostream__link link"}).get("href")
    
        @property
        def title(self):
            """The title of the video"""
            return self._elem.find("h3", attrs = {"class" : "thumbnail__title"}).get("title")
    
        @property
        def upload_date(self):
            """The time that the video was uploaded, in seconds since epoch"""
            return utils.parse_timestamp(self._elem.find("time", attrs = {"class" : "videostream__data--subitem videostream__time"}).get("datetime"))
    

    thumbnail property

    The video thumbnail as a binary string

    thumbnail_url property

    The URL of the video's thumbnail image

    title property

    The title of the video

    upload_date property

    The time that the video was uploaded, in seconds since epoch

    video_id property

    The numeric ID of the video in base 10

    video_id_b10 property

    The numeric ID of the video in base 10

    video_id_b36 property

    The numeric ID of the video in base 36

    video_url property

    The URL of the video's viewing page

    __eq__(other)

    Determine if this video is equal to another.

    Parameters:

    Name Type Description Default
    other (int, str, HTMLVideo)

    Object to compare to.

    required

    Returns:

    Name Type Description
    Comparison (bool, None)

    Did it fit the criteria?

    Source code in cocorum/scraping.py
    471
    472
    473
    474
    475
    476
    477
    478
    479
    480
    481
    482
    483
    484
    485
    486
    487
    488
    489
    490
    491
    492
    493
    494
    495
    def __eq__(self, other):
        """Determine if this video is equal to another.
    
    Args:
        other (int, str, HTMLVideo): Object to compare to.
    
    Returns:
        Comparison (bool, None): Did it fit the criteria?
        """
    
        #Check for direct matches first
        if isinstance(other, int):
            return self.video_id_b10 == other
        if isinstance(other, str):
            return str(other) == self.video_id_b36
    
        #Check for object attributes to match to
        if hasattr(other, "video_id"):
            return self.video_id_b10 == utils.ensure_b10(other.video_id)
        if hasattr(other, "stream_id"):
            return self.video_id_b10 == utils.ensure_b10(other.stream_id)
    
        #Check conversion to integer last, in case another ID or something happens to match
        if hasattr(other, "__int__"):
            return self.video_id_b10 == int(other)
    

    __init__(elem)

    Video on a user or channel page as extracted from the page's HTML.

    Parameters:

    Name Type Description Default
    elem Tag

    The class = "thumbnail__grid-item" video element.

    required
    Source code in cocorum/scraping.py
    451
    452
    453
    454
    455
    456
    457
    458
    459
    460
    461
    def __init__(self, elem):
        """Video on a user or channel page as extracted from the page's HTML.
    
    Args:
        elem (bs4.Tag): The class = "thumbnail__grid-item" video element.
        """
    
        super().__init__(elem)
    
        #The binary data of our thumbnail
        self.__thumbnail = None
    

    __int__()

    The video as an integer (it's numeric ID)

    Source code in cocorum/scraping.py
    463
    464
    465
    def __int__(self):
        """The video as an integer (it's numeric ID)"""
        return self.video_id_b10
    

    __str__()

    The video as a string (it's ID in base 36)

    Source code in cocorum/scraping.py
    467
    468
    469
    def __str__(self):
        """The video as a string (it's ID in base 36)"""
        return self.video_id_b36
    

    Scraper

    Scraper for general information

    Source code in cocorum/scraping.py
    543
    544
    545
    546
    547
    548
    549
    550
    551
    552
    553
    554
    555
    556
    557
    558
    559
    560
    561
    562
    563
    564
    565
    566
    567
    568
    569
    570
    571
    572
    573
    574
    575
    576
    577
    578
    579
    580
    581
    582
    583
    584
    585
    586
    587
    588
    589
    590
    591
    592
    593
    594
    595
    596
    597
    598
    599
    600
    601
    602
    603
    604
    605
    606
    607
    608
    609
    610
    611
    612
    613
    614
    615
    616
    617
    618
    619
    620
    621
    622
    623
    624
    625
    626
    627
    628
    629
    630
    631
    632
    633
    634
    635
    636
    637
    638
    639
    640
    641
    642
    643
    644
    645
    646
    647
    648
    649
    650
    651
    652
    653
    654
    655
    656
    657
    658
    659
    660
    661
    662
    663
    664
    665
    666
    667
    668
    669
    670
    671
    672
    673
    674
    675
    676
    677
    678
    679
    680
    681
    682
    683
    684
    685
    686
    687
    688
    689
    690
    691
    692
    693
    694
    695
    696
    697
    698
    699
    700
    class Scraper:
        """Scraper for general information"""
        def __init__(self, servicephp):
            """Scraper for general information.
    
        Args:
            servicephp (ServicePHP): A ServicePHP instance, for authentication.
            """
    
            self.servicephp = servicephp
    
        @property
        def session_cookie(self):
            """The session cookie we are logged in with"""
            return self.servicephp.session_cookie
    
        @property
        def username(self):
            """Our username"""
            return self.servicephp.username
    
        def soup_request(self, url: str):
            """Make a GET request to a URL, and return HTML beautiful soup for scraping.
    
        Args:
            url (str): The URL to query.
    
        Returns:
            Soup (bs4.BeautifulSoup): The webpage at the URL, logged-in version.
            """
    
            r = requests.get(
                url,
                cookies = self.session_cookie,
                timeout = static.Delays.request_timeout,
                headers = static.RequestHeaders.user_agent,
                )
    
            assert r.status_code == 200, f"Fetching page {url} failed: {r}\n{r.text}"
            return bs4.BeautifulSoup(r.text, features = "html.parser")
    
        def get_muted_user_record(self, username: str = None):
            """Get the record IDs for mutes.
    
        Args:
            username (str): Username to find record ID for.
                Defaults to None.
    
        Returns:
            Record (int, dict): Either the single user's mute record ID, or a dict of all username:mute record ID pairs.
            """
    
            #The page we are on
            pagenum = 1
    
            #username : record ID
            record_ids = {}
    
            #While there are more pages
            while True:
                #Get the next page of mutes and search for mute buttons
                soup = self.soup_request(static.URI.mutes_page.format(page = pagenum))
                elems = soup.find_all("button", attrs = {"class" : "unmute_action button-small"})
    
                #We reached the last page
                if not elems:
                    break
    
                #Get the record IDs per username from each button
                for e in elems:
                    #We were searching for a specific username and found it
                    if username and e.attrs["data-username"] == username:
                        return e.attrs["data-record-id"]
    
                    record_ids[e.attrs["data-username"]] = int(e.attrs["data-record-id"])
    
                #Turn the page
                pagenum +=1
    
            #Only return record IDs if we weren't searching for a particular one
            if not username:
                return record_ids
    
            #We were searching for a user and did not find them
            return None
    
        def get_channels(self, username: str = None):
            """Get all channels under a username.
    
        Args:
            username (str): The username to get the channels under.
                Defaults to None, use our own username.
    
        Returns:
            Channels (list): List of HTMLChannel objects.
            """
    
            if not username:
                username = self.username
    
            #Get the page of channels and parse for them
            soup = self.soup_request(static.URI.channels_page.format(username = username))
            elems = soup.find_all("div", attrs = {"data-type" : "channel"})
            return [HTMLChannel(e) for e in elems]
    
        def get_videos(self, username = None, is_channel = False, max_num = None):
            """Get the videos under a user or channel.
    
        Args:
            username (str): The name of the user or channel to search under.
                Defaults to ourselves.
            is_channel (bool): Is this a channel instead of a userpage?
                Defaults to False.
            max_num (int): The maximum number of videos to retrieve, starting from the newest.
                Defaults to None, return all videos.
                Note, rounded up to the nearest page.
    
        Returns:
            Videos (list): List of HTMLVideo objects.
            """
    
            #default to the logged-in username
            if not username:
                username = self.username
    
            #If this is a channel username, we will need a slightly different URL
            uc = ("user", "c")[is_channel]
    
            #The base userpage URL currently has all their videos / livestreams on it
            url_start = f"{static.URI.rumble_base}/{uc}/{username}"
    
            #Start the loop with:
            #- no videos found yet
            #- the assumption that there will be new video elements
            #- a current page number of 1
            videos = []
            new_video_elems = True
            pagenum = 1
            while new_video_elems and (not max_num or len(videos) < max_num):
                #Get the next page of videos
                soup = self.soup_request(f"{url_start}?page={pagenum}")
    
                #Search for video listings
                new_video_elems = soup.find_all("div", attrs = {"class" : "videostream thumbnail__grid--item"})
    
                #We found some video listings
                if new_video_elems:
                    videos += [HTMLVideo(e) for e in new_video_elems]
    
                #Turn the page
                pagenum += 1
    
            return videos
    
        def get_playlists(self):
            """Get the playlists under the logged in user"""
            soup = self.soup_request(static.URI.playlists_page)
            return [HTMLPlaylist(elem, self) for elem in soup.find_all("div", attrs = {"class" : "playlist"})]
    

    The session cookie we are logged in with

    username property

    Our username

    __init__(servicephp)

    Scraper for general information.

    Parameters:

    Name Type Description Default
    servicephp ServicePHP

    A ServicePHP instance, for authentication.

    required
    Source code in cocorum/scraping.py
    545
    546
    547
    548
    549
    550
    551
    552
    def __init__(self, servicephp):
        """Scraper for general information.
    
    Args:
        servicephp (ServicePHP): A ServicePHP instance, for authentication.
        """
    
        self.servicephp = servicephp
    

    get_channels(username=None)

    Get all channels under a username.

    Parameters:

    Name Type Description Default
    username str

    The username to get the channels under. Defaults to None, use our own username.

    None

    Returns:

    Name Type Description
    Channels list

    List of HTMLChannel objects.

    Source code in cocorum/scraping.py
    629
    630
    631
    632
    633
    634
    635
    636
    637
    638
    639
    640
    641
    642
    643
    644
    645
    646
    def get_channels(self, username: str = None):
        """Get all channels under a username.
    
    Args:
        username (str): The username to get the channels under.
            Defaults to None, use our own username.
    
    Returns:
        Channels (list): List of HTMLChannel objects.
        """
    
        if not username:
            username = self.username
    
        #Get the page of channels and parse for them
        soup = self.soup_request(static.URI.channels_page.format(username = username))
        elems = soup.find_all("div", attrs = {"data-type" : "channel"})
        return [HTMLChannel(e) for e in elems]
    

    get_muted_user_record(username=None)

    Get the record IDs for mutes.

    Parameters:

    Name Type Description Default
    username str

    Username to find record ID for. Defaults to None.

    None

    Returns:

    Name Type Description
    Record (int, dict)

    Either the single user's mute record ID, or a dict of all username:mute record ID pairs.

    Source code in cocorum/scraping.py
    584
    585
    586
    587
    588
    589
    590
    591
    592
    593
    594
    595
    596
    597
    598
    599
    600
    601
    602
    603
    604
    605
    606
    607
    608
    609
    610
    611
    612
    613
    614
    615
    616
    617
    618
    619
    620
    621
    622
    623
    624
    625
    626
    627
    def get_muted_user_record(self, username: str = None):
        """Get the record IDs for mutes.
    
    Args:
        username (str): Username to find record ID for.
            Defaults to None.
    
    Returns:
        Record (int, dict): Either the single user's mute record ID, or a dict of all username:mute record ID pairs.
        """
    
        #The page we are on
        pagenum = 1
    
        #username : record ID
        record_ids = {}
    
        #While there are more pages
        while True:
            #Get the next page of mutes and search for mute buttons
            soup = self.soup_request(static.URI.mutes_page.format(page = pagenum))
            elems = soup.find_all("button", attrs = {"class" : "unmute_action button-small"})
    
            #We reached the last page
            if not elems:
                break
    
            #Get the record IDs per username from each button
            for e in elems:
                #We were searching for a specific username and found it
                if username and e.attrs["data-username"] == username:
                    return e.attrs["data-record-id"]
    
                record_ids[e.attrs["data-username"]] = int(e.attrs["data-record-id"])
    
            #Turn the page
            pagenum +=1
    
        #Only return record IDs if we weren't searching for a particular one
        if not username:
            return record_ids
    
        #We were searching for a user and did not find them
        return None
    

    get_playlists()

    Get the playlists under the logged in user

    Source code in cocorum/scraping.py
    697
    698
    699
    700
    def get_playlists(self):
        """Get the playlists under the logged in user"""
        soup = self.soup_request(static.URI.playlists_page)
        return [HTMLPlaylist(elem, self) for elem in soup.find_all("div", attrs = {"class" : "playlist"})]
    

    get_videos(username=None, is_channel=False, max_num=None)

    Get the videos under a user or channel.

    Parameters:

    Name Type Description Default
    username str

    The name of the user or channel to search under. Defaults to ourselves.

    None
    is_channel bool

    Is this a channel instead of a userpage? Defaults to False.

    False
    max_num int

    The maximum number of videos to retrieve, starting from the newest. Defaults to None, return all videos. Note, rounded up to the nearest page.

    None

    Returns:

    Name Type Description
    Videos list

    List of HTMLVideo objects.

    Source code in cocorum/scraping.py
    648
    649
    650
    651
    652
    653
    654
    655
    656
    657
    658
    659
    660
    661
    662
    663
    664
    665
    666
    667
    668
    669
    670
    671
    672
    673
    674
    675
    676
    677
    678
    679
    680
    681
    682
    683
    684
    685
    686
    687
    688
    689
    690
    691
    692
    693
    694
    695
    def get_videos(self, username = None, is_channel = False, max_num = None):
        """Get the videos under a user or channel.
    
    Args:
        username (str): The name of the user or channel to search under.
            Defaults to ourselves.
        is_channel (bool): Is this a channel instead of a userpage?
            Defaults to False.
        max_num (int): The maximum number of videos to retrieve, starting from the newest.
            Defaults to None, return all videos.
            Note, rounded up to the nearest page.
    
    Returns:
        Videos (list): List of HTMLVideo objects.
        """
    
        #default to the logged-in username
        if not username:
            username = self.username
    
        #If this is a channel username, we will need a slightly different URL
        uc = ("user", "c")[is_channel]
    
        #The base userpage URL currently has all their videos / livestreams on it
        url_start = f"{static.URI.rumble_base}/{uc}/{username}"
    
        #Start the loop with:
        #- no videos found yet
        #- the assumption that there will be new video elements
        #- a current page number of 1
        videos = []
        new_video_elems = True
        pagenum = 1
        while new_video_elems and (not max_num or len(videos) < max_num):
            #Get the next page of videos
            soup = self.soup_request(f"{url_start}?page={pagenum}")
    
            #Search for video listings
            new_video_elems = soup.find_all("div", attrs = {"class" : "videostream thumbnail__grid--item"})
    
            #We found some video listings
            if new_video_elems:
                videos += [HTMLVideo(e) for e in new_video_elems]
    
            #Turn the page
            pagenum += 1
    
        return videos
    

    soup_request(url)

    Make a GET request to a URL, and return HTML beautiful soup for scraping.

    Parameters:

    Name Type Description Default
    url str

    The URL to query.

    required

    Returns:

    Name Type Description
    Soup BeautifulSoup

    The webpage at the URL, logged-in version.

    Source code in cocorum/scraping.py
    564
    565
    566
    567
    568
    569
    570
    571
    572
    573
    574
    575
    576
    577
    578
    579
    580
    581
    582
    def soup_request(self, url: str):
        """Make a GET request to a URL, and return HTML beautiful soup for scraping.
    
    Args:
        url (str): The URL to query.
    
    Returns:
        Soup (bs4.BeautifulSoup): The webpage at the URL, logged-in version.
        """
    
        r = requests.get(
            url,
            cookies = self.session_cookie,
            timeout = static.Delays.request_timeout,
            headers = static.RequestHeaders.user_agent,
            )
    
        assert r.status_code == 200, f"Fetching page {url} failed: {r}\n{r.text}"
        return bs4.BeautifulSoup(r.text, features = "html.parser")
    

    S.D.G.