I am trying to write a crawler to explore your v. cool app. So far I like the constructs, schedule, state and table are pretty useful. Here's a bug I encountered:
BING_SUBSCRIPTION_KEY = Parameter("BING_SUBSCRIPTION_KEY")
BING_API_IMAGE_ENDPOINT = "https://api.bing.microsoft.com/v7.0/images/search"
STATE = State()
table = Table("bing_search_spotify_album", "w")
def process(offset):
headers = {"Ocp-Apim-Subscription-Key": BING_SUBSCRIPTION_KEY}
params = {
"q": "site:https://open.spotify.com/album/",
"textDecorations": True,
"textFormat": "HTML",
"count": 200,
"offset": offset,
}
resp = requests.get(BING_API_IMAGE_ENDPOINT, headers=headers, params=params)
if not resp:
print(resp.text)
resp.raise_for_status()
search_results = resp.json()
# print(search_results)
last_offset = offset
batch = []
for i,result in enumerate(search_results.get("value")):
print(result)
last_offset = offset+i
row = dict(
offset = last_offset,
query = urllib.parse.urlencode(params),
thumbnail_url = result.get("thumbnailUrl"),
content_url = result.get("contentUrl"),
name = result.get("name"),
host_url = result.get("hostPageUrl"),
width = result.get("width"),
height = result.get("height"),
thumbnail_width = result.get("thumbnail").get("width"),
thumbnail_height = result.get("thumbnail").get("height"),
)
batch.append(row)
return batch, dict(
last_offset=last_offset,
last_completed=time.time(),
last_batch=batch, # NOTE: Removing this fixed the issue
last_result=search_results # NOTE: Removing this fixed the issue
)
# sleep for 2 hours to keep <1000 req per month
if time.time() > STATE.get_value("last_completed", 0) + 7200:
batch, update = process(
offset=STATE.get_value("last_offset", 0)
)
STATE.set(update)
print(update)
table.append(batch)
{'webSearchUrl': 'https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=site%3ahttps%3a%2f%2fopen.spotify.com%2falbum%2f&id=E9ADCC2340F3E6C6AC86AA063964C7BB35B55AD4&simid=608034758239470266', 'name': 'Afrobeats 2019 by Various Artists on Spotify', 'thumbnailUrl': 'https://tse1.explicit.bing.net/th?id=OIP.IZSQBS103aMDbmK69eFHAwHaHa&pid=Api', 'datePublished': '2020-05-12T11:39:00.0000000Z', 'isFamilyFriendly': False, 'contentUrl': 'https://i.scdn.co/image/ab67616d0000b273902a0a4e4706077a4f08bddc', 'hostPageUrl': 'https://open.spotify.com/album/7gBEu5fhAqpGbEtPBlgumB', 'contentSize': '89555 B', 'encodingFormat': 'jpeg', 'hostPageDisplayUrl': '<b>https://open.spotify.com/album/</b>7gBEu5fhAqpGbEtPBlgumB', 'width': 640, 'height': 640, 'hostPageFavIconUrl': 'https://www.bing.com/th?id=ODF.R52bdbpEO49IDxcvodLRPQ&pid=Api', 'hostPageDomainFriendlyName': 'Spotify', 'hostPageDiscoveredDate': '2019-04-30T00:00:00.0000000Z', 'thumbnail': {'width': 474, 'height': 474}, 'imageInsightsToken': 'ccid_IZSQBS10*cp_43C66ACB7882454BCA658DE55D8F9DC5*mid_E9ADCC2340F3E6C6AC86AA063964C7BB35B55AD4*simid_608034758239470266*thid_OIP.IZSQBS103aMDbmK69eFHAwHaHa', 'insightsMetadata': {'pagesIncludingCount': 3, 'availableSizesCount': 3}, 'imageId': 'E9ADCC2340F3E6C6AC86AA063964C7BB35B55AD4', 'accentColor': 'CBA700'}
{'webSearchUrl': 'https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=site%3ahttps%3a%2f%2fopen.spotify.com%2falbum%2f&id=9E0EB6940AEF800E6583EE319B5A7C3F7B9997C6&simid=608022440268620733', 'name': 'Latino Hits 2021 - Compilation by Various Artists | Spotify', 'thumbnailUrl': 'https://tse1.mm.bing.net/th?id=OIP.m8nKtJZA4hVjac06QG-4MAHaHa&pid=Api', 'datePublished': '2021-06-06T19:41:00.0000000Z', 'isFamilyFriendly': True, 'contentUrl': 'https://i.scdn.co/image/ab67616d0000b273032ff8097e2f7d7a0224cd3e', 'hostPageUrl': 'https://open.spotify.com/album/6lho1kKuau2Y42v1OBFokO', 'contentSize': '111293 B', 'encodingFormat': 'jpeg', 'hostPageDisplayUrl': '<b>https://open.spotify.com/album/</b>6lho1kKuau2Y42v1OBFokO', 'width': 640, 'height': 640, 'hostPageDiscoveredDate': '2021-04-14T00:00:00.0000000Z', 'thumbnail': {'width': 474, 'height': 474}, 'imageInsightsToken': 'ccid_m8nKtJZA*cp_021366D268A21C90781B29F60A4FCAAE*mid_9E0EB6940AEF800E6583EE319B5A7C3F7B9997C6*simid_608022440268620733*thid_OIP.m8nKtJZA4hVjac06QG-4MAHaHa', 'insightsMetadata': {'pagesIncludingCount': 6, 'availableSizesCount': 4}, 'imageId': '9E0EB6940AEF800E6583EE319B5A7C3F7B9997C6', 'accentColor': '27609A'}
{'webSearchUrl': 'https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=site%3ahttps%3a%2f%2fopen.spotify.com%2falbum%2f&id=4C740C11D9EE305300B49FA7D994852786DEAEE2&simid=608001974760133715', 'name': 'PRISM (Deluxe) - Album by Katy Perry | Spotify', 'thumbnailUrl': 'https://tse4.mm.bing.net/th?id=OIP.xuefx77Rebve4ytjkBu5tgHaHa&pid=Api', 'datePublished': '2013-10-23T07:50:00.0000000Z', 'isFamilyFriendly': True, 'contentUrl': 'https://i.scdn.co/image/ab67616d0000b27347f930accd8ac01686401fa2', 'hostPageUrl': 'https://open.spotify.com/album/5MQBzs5YlZlE28mD9yUItn', 'contentSize': '92459 B', 'encodingFormat': 'jpeg', 'hostPageDisplayUrl': '<b>https://open.spotify.com/album/</b>5MQBzs5YlZlE28mD9yUItn', 'width': 640, 'height': 640, 'hostPageFavIconUrl': 'https://www.bing.com/th?id=ODF.R52bdbpEO49IDxcvodLRPQ&pid=Api', 'hostPageDomainFriendlyName': 'Spotify', 'hostPageDiscoveredDate': '2013-10-23T07:50:18.0000000Z', 'thumbnail': {'width': 474, 'height': 474}, 'imageInsightsToken': 'ccid_xuefx77R*cp_38E9D4B8FE7B810AC79F9DA208FE8F2E*mid_4C740C11D9EE305300B49FA7D994852786DEAEE2*simid_608001974760133715*thid_OIP.xuefx77Rebve4ytjkBu5tgHaHa', 'insightsMetadata': {'recipeSourcesCount': 0, 'pagesIncludingCount': 469, 'availableSizesCount': 102}, 'imageId': '4C740C11D9EE305300B49FA7D994852786DEAEE2', 'accentColor': 'B1951A'}
{'webSearchUrl': 'https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=site%3ahttps%3a%2f%2fopen.spotify.com%2falbum%2f&id=000CFEA5E6DA4823C3C7688ADF6B8BEEB68FE086&simid=607996782153506816', 'name': 'Tabata Songs 2020: 20 Sec. Work & 10 Sec. Rest Cycles - Album by Tabata ...', 'thumbnailUrl': 'https://tse1.mm.bing.net/th?id=OIP.WT55rX93sI4F7pp9Xm_IKwHaHa&pid=Api', 'datePublished': '2021-04-21T22:09:00.0000000Z', 'isFamilyFriendly': True, 'contentUrl': 'https://i.scdn.co/image/ab67616d0000b273d551eb09e4c9cee22b96dba4', 'hostPageUrl': 'https://open.spotify.com/album/7sK0MYzPWdIOflCEJmVvWD', 'contentSize': '90176 B', 'encodingFormat': 'jpeg', 'hostPageDisplayUrl': '<b>https://open.spotify.com/album/</b>7sK0MYzPWdIOflCEJmVvWD', 'width': 640, 'height': 640, 'hostPageFavIconUrl': 'https://www.bing.com/th?id=ODF.R52bdbpEO49IDxcvodLRPQ&pid=Api', 'hostPageDomainFriendlyName': 'Spotify', 'hostPageDiscoveredDate': '2020-02-20T00:00:00.0000000Z', 'thumbnail': {'width': 474, 'height': 474}, 'imageInsightsToken': 'ccid_WT55rX93*cp_A89DC841AFCB85C91019D4E896699ECB*mid_000CFEA5E6DA4823C3C7688ADF6B8BEEB68FE086*simid_607996782153506816*thid_OIP.WT55rX93sI4F7pp9Xm!_IKwHaHa', 'insightsMetadata': {'pagesIncludingCount': 2, 'availableSizesCount': 2}, 'imageId': '000CFEA5E6DA4823C3C7688ADF6B8BEEB68FE086', 'accentColor': '666666'}
{'webSearchUrl': 'https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=site%3ahttps%3a%2f%2fopen.spotify.com%2falbum%2f&id=ACD0A1DA61D46EF5E66B674072AF293E78770D6F&simid=608032894229619605', 'name': 'reputation - Album by Taylor Swift | Spotify', 'thumbnailUrl': 'https://tse4.mm.bing.net/th?id=OIP.117skYvCoUrVk9T_s_rXnAAAAA&pid=Api', 'datePublished': '2017-09-22T22:40:00.0000000Z', 'isFamilyFriendly': True, 'contentUrl': 'https://i.scdn.co/image/ab67616d0000b273da5d5aeeabacacc1263c0f4b', 'hostPageUrl': 'https://open.spotify.com/album/6DEjYFkNZh67HP7R9PSZvv', 'contentSize': '132658 B', 'encodingFormat': 'jpeg', 'hostPageDisplayUrl': '<b>https://open.spotify.com/album/</b>6DEjYFkNZh67HP7R9PSZvv', 'width': 640, 'height': 640, 'hostPageFavIconUrl': 'https://www.bing.com/th?id=ODF.R52bdbpEO49IDxcvodLRPQ&pid=Api', 'hostPageDomainFriendlyName': 'Spotify', 'hostPageDiscoveredDate': '2017-09-22T22:40:33.0000000Z', 'thumbnail': {'width': 474, 'height': 474}, 'imageInsightsToken': 'ccid_117skYvC*cp_2EAB6AEA0C662632B737D2899F5BC828*mid_ACD0A1DA61D46EF5E66B674072AF293E78770D6F*simid_608032894229619605*thid_OIP.117skYvCoUrVk9T!_s!_rXnAAAAA', 'insightsMetadata': {'recipeSourcesCount': 0, 'pagesIncludingCount': 1292, 'availableSizesCount': 484}, 'imageId': 'ACD0A1DA61D46EF5E66B674072AF293E78770D6F', 'accentColor': '212121'}
{'webSearchUrl': 'https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=site%3ahttps%3a%2f%2fopen.spotify.com%2falbum%2f&id=A8A9801DBB5910696DFE8BCE3EE7D26818389A88&simid=607992263846672663', 'name': 'Deep House Relax - Compilation by Various Artists | Spotify', 'thumbnailUrl': 'https://tse3.mm.bing.net/th?id=OIP.w1Qr0TdzT_ItWaTIwXzTnAHaHa&pid=Api', 'datePublished': '2017-03-26T00:48:00.0000000Z', 'isFamilyFriendly': True, 'contentUrl': 'https://i.scdn.co/image/ab67616d0000b273e6ab6301659453c587a4bc4b', 'hostPageUrl': 'https://open.spotify.com/album/0rwYBNJwMprvq8I0WV7i8H', 'contentSize': '121039 B', 'encodingFormat': 'jpeg', 'hostPageDisplayUrl': '<b>https://open.spotify.com/album/</b>0rwYBNJwMprvq8I0WV7i8H', 'width': 640, 'height': 640, 'hostPageFavIconUrl': 'https://www.bing.com/th?id=ODF.R52bdbpEO49IDxcvodLRPQ&pid=Api', 'hostPageDomainFriendlyName': 'Spotify', 'hostPageDiscoveredDate': '2017-03-26T00:48:00.0000000Z', 'thumbnail': {'width': 474, 'height': 474}, 'imageInsightsToken': 'ccid_w1Qr0Tdz*cp_E05B645E8EF4F7049FAB390F4C107CCA*mid_A8A9801DBB5910696DFE8BCE3EE7D26818389A88*simid_607992263846672663*thid_OIP.w1Qr0TdzT!_ItWaTIwXzTnAHaHa', 'insightsMetadata': {'pagesIncludingCount': 3, 'availableSizesCount': 3}, 'imageId': 'A8A9801DBB5910696DFE8BCE3EE7D26818389A88', 'accentColor': '083880'}
{'webSearchUrl': 'https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=site%3ahttps%3a%2f%2fopen.spotify.com%2falbum%2f&id=7E8DD6DEA59E47C79A46EF16F68316CB0A866C4C&simid=608027379491675102', 'name': 'Love Songs by Beth on Spotify', 'thumbnailUrl': 'https://tse1.explicit.bing.net/th?id=OIP.GooE4oMjM85ZSoh76cjNswHaHa&pid=Api', 'datePublished': '2021-03-11T16:10:00.0000000Z', 'isFamilyFriendly': False, 'contentUrl': 'https://i.scdn.co/image/ab67616d0000b2739e9b35c23db7b4c4250acd5e', 'hostPageUrl': 'https://open.spotify.com/album/63SJYNPV82aAeMU1iuMvU8', 'contentSize': '47911 B', 'encodingFormat': 'jpeg', 'hostPageDisplayUrl': '<b>https://open.spotify.com/album/</b>63SJYNPV82aAeMU1iuMvU8', 'width': 640, 'height': 640, 'hostPageFavIconUrl': 'https://www.bing.com/th?id=ODF.R52bdbpEO49IDxcvodLRPQ&pid=Api', 'hostPageDomainFriendlyName': 'Spotify', 'hostPageDiscoveredDate': '2017-01-10T00:00:00.0000000Z', 'thumbnail': {'width': 474, 'height': 474}, 'imageInsightsToken': 'ccid_GooE4oMj*cp_E35A2EF6C028AE2C1209FF1E92D0BF9F*mid_7E8DD6DEA59E47C79A46EF16F68316CB0A866C4C*simid_608027379491675102*thid_OIP.GooE4oMjM85ZSoh76cjNswHaHa', 'insightsMetadata': {'pagesIncludingCount': 3, 'availableSizesCount': 2}, 'imageId': '7E8DD6DEA59E47C79A46EF16F68316CB0A866C4C', 'accentColor': 'C24509'}
{'webSearchUrl': 'https://www.bing.com/images/search?view=detailv2&FORM=OIIRPO&q=site%3ahttps%3a%2f%2fopen.spotify.com%2falbum%2f&id=76321ACC2F37D8963CBA91CE622E81111C4E1CC5&simid=608034363109737202', 'name': 'Top Radio Hits - Album by Top 40 Hits, The Cover Crew, Dance Hits 2017 ...', 'thumbnailUrl': 'https://tse4.mm.bing.net/th?id=OIP.1--fEfj5kRx_6NxI9mi2qwHaHa&pid=Api', 'datePublished': '2021-04-25T18:01:00.0000000Z', 'isFamilyFriendly': True, 'contentUrl': 'https://i.scdn.co/image/ab67616d0000b273237353afe0bd5986e3911684', 'hostPageUrl': 'https://open.spotify.com/album/6hLT8YyHHWrEfU0sTvAQ6F', 'contentSize': '155717 B', 'encodingFormat': 'jpeg', 'hostPageDisplayUrl': '<b>https://open.spotify.com/album/</b>6hLT8YyHHWrEfU0sTvAQ6F', 'width': 640, 'height': 640, 'hostPageFavIconUrl': 'https://www.bing.com/th?id=ODF.R52bdbpEO49IDxcvodLRPQ&pid=Api', 'hostPageDomainFriendlyName': 'Spotify', 'hostPageDiscoveredDate': '2018-04-03T00:00:00.0000000Z', 'thumbnail': {'width': 474, 'height': 474}, 'imageInsightsToken': 'ccid_1++fEfj5*cp_8DC0DA3C8EC4E93B0E343553C8FB9B18*mid_76321ACC2F37D8963CBA91CE622E81111C4E1CC5*simid_608034363109737202*thid_OIP.1--fEfj5kRx!_6NxI9mi2qwHaHa', 'insightsMetadata': {'pagesIncludingCount': 6, 'availableSizesCount': 5}, 'imageId': '76321ACC2F37D8963CBA91CE622E81111C4E1CC5', 'accentColor': 'C90267'}