georgiad commited on
Commit
f0bc6d4
·
1 Parent(s): 8f98272

Switched to single feed

Browse files
newsletter_production_mercury.ipynb CHANGED
@@ -1,493 +1,444 @@
1
  {
2
- "cells": [
3
- {
4
- "cell_type": "raw",
5
- "metadata": {},
6
- "source": [
7
- "---\n",
8
- "title: Newsletter Helper\n",
9
- "description: Follow the instructions on screen\n",
10
- "show-code: false\n",
11
- "params:\n",
12
- " feed_names:\n",
13
- " label: Sources\n",
14
- " input: select\n",
15
- " value: ['a16z.com/',\n",
16
- " 'sequoiacap.com/article',\n",
17
- " 'zettavp.com/playbook/',\n",
18
- " 'atomico.com/insights/',\n",
19
- " 'nt-z.ro/',\n",
20
- " 'accel.com/noteworthy',\n",
21
- " 'felicis.com/',\n",
22
- " 'scalevp.com/blog/',\n",
23
- " 'redpoint.com/start/',\n",
24
- " '83north.com/',\n",
25
- " 'bvp.com/atlas/']\n",
26
- " choices: ['a16z.com/',\n",
27
- " 'sequoiacap.com/article',\n",
28
- " 'zettavp.com/playbook/',\n",
29
- " 'atomico.com/insights/',\n",
30
- " 'nt-z.ro/',\n",
31
- " 'accel.com/noteworthy',\n",
32
- " 'felicis.com/',\n",
33
- " 'scalevp.com/blog/',\n",
34
- " 'redpoint.com/start/',\n",
35
- " '83north.com/',\n",
36
- " 'bvp.com/atlas/']\n",
37
- " multi: True\n",
38
- " feed_age:\n",
39
- " label: How old?\n",
40
- " input: select\n",
41
- " value: '7 days'\n",
42
- " choices: ['7 days', '14 days', '30 days']\n",
43
- " multi: False\n",
44
- "---"
45
- ]
46
- },
47
- {
48
- "cell_type": "code",
49
- "execution_count": 1,
50
- "metadata": {
51
- "id": "pfJ5NpqjCT1U"
52
- },
53
- "outputs": [],
54
- "source": [
55
- "feed_names = ['a16z.com/',\n",
56
- " 'sequoiacap.com/article',\n",
57
- " 'zettavp.com/playbook/',\n",
58
- " 'atomico.com/insights/',\n",
59
- " 'nt-z.ro/',\n",
60
- " 'accel.com/noteworthy',\n",
61
- " 'felicis.com/',\n",
62
- " 'scalevp.com/blog/',\n",
63
- " 'redpoint.com/start/',\n",
64
- " '83north.com/',\n",
65
- " 'bvp.com/atlas/']\n",
66
- "feed_age = '28 days'"
67
- ]
68
- },
69
- {
70
- "cell_type": "code",
71
- "execution_count": 2,
72
- "metadata": {},
73
- "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  {
75
- "data": {
76
- "text/plain": [
77
- "\",\\n 'sequoiacap.com/article': 'https://nitter.kavin.rocks/sequoia/rss',\\n 'zettavp.com/playbook/': 'https://nitter.kavin.rocks/ZettaVentures/rss',\\n 'atomico.com/insights/': 'https://nitter.kavin.rocks/atomico/rss',\\n 'nt-z.ro/': 'https://nitter.kavin.rocks/Breakthrough/rss',\\n 'accel.com/noteworthy': 'https://nitter.kavin.rocks/Accel/rss',\\n 'felicis.com/': 'https://nitter.kavin.rocks/felicis/rss',\\n 'scalevp.com/blog/': 'https://nitter.kavin.rocks/scalevp/rss',\\n 'redpoint.com/start/': 'https://nitter.kavin.rocks/Redpoint/rss',\\n '83north.com/': 'https://nitter.kavin.rocks/83NorthVC/rss',\\n 'bvp.com/atlas/': 'https://nitter.kavin.rocks/BessemerVP/rss'}\\n\""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  ]
79
- },
80
- "execution_count": 2,
81
- "metadata": {},
82
- "output_type": "execute_result"
83
  }
84
- ],
85
- "source": [
86
- "keywords = [\"Electro mobility\",\n",
87
- " \"Batteries \",\n",
88
- " \"Battery Management systems\",\n",
89
- " \"Lidars\",\n",
90
- " \"RADARS\",\n",
91
- " \"AI\",\n",
92
- " \"Industrial AI\",\n",
93
- " \"Transportation\",\n",
94
- " \"Mobility\",\n",
95
- " \"Climate Tech\",\n",
96
- " \"Sustainable grid\",\n",
97
- " \"Sensor fusion\",\n",
98
- " \"Computer vision\",\n",
99
- " \"Data Analytics\",\n",
100
- " \"Digital Twins\",\n",
101
- " \"Automotive Cybersecurity\",\n",
102
- " \"Logistics\",\n",
103
- " \"Ports\",\n",
104
- " \"Construction sites\",\n",
105
- " \"Mines\",\n",
106
- " \"Quarries\",\n",
107
- " \"Trucks\",\n",
108
- " \"Power train\",\n",
109
- " \"Software defined vehicle\"]\n",
110
- "'''\n",
111
- "feed_dict = {'a16z.com/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/64844802/a16z',\n",
112
- " 'sequoiacap.com/article': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/24775410/sequoia',\n",
113
- " 'zettavp.com/playbook/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/3952740738/ZettaVentures',\n",
114
- " 'atomico.com/insights/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/44579473/atomico',\n",
115
- " 'nt-z.ro/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/793585473956425728/Breakthrough',\n",
116
- " 'accel.com/noteworthy': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/33846451/Accel',\n",
117
- " 'felicis.com/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/382162909/felicis',\n",
118
- " 'scalevp.com/blog/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/17904605/scalevp',\n",
119
- " 'redpoint.com/start/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/104286459/Redpoint',\n",
120
- " '83north.com/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/2963095189/83NorthVC',\n",
121
- " 'bvp.com/atlas/': 'https://rssbox.us-west-2.elasticbeanstalk.com/twitter/59166197/BessemerVP'}\n",
122
- "'''\n",
123
- "\n",
124
- "feed_dict = {'a16z.com/': 'https://nitter.kavin.rocks/a16z/rss'}\n",
125
- "''',\n",
126
- " 'sequoiacap.com/article': 'https://nitter.kavin.rocks/sequoia/rss',\n",
127
- " 'zettavp.com/playbook/': 'https://nitter.kavin.rocks/ZettaVentures/rss',\n",
128
- " 'atomico.com/insights/': 'https://nitter.kavin.rocks/atomico/rss',\n",
129
- " 'nt-z.ro/': 'https://nitter.kavin.rocks/Breakthrough/rss',\n",
130
- " 'accel.com/noteworthy': 'https://nitter.kavin.rocks/Accel/rss',\n",
131
- " 'felicis.com/': 'https://nitter.kavin.rocks/felicis/rss',\n",
132
- " 'scalevp.com/blog/': 'https://nitter.kavin.rocks/scalevp/rss',\n",
133
- " 'redpoint.com/start/': 'https://nitter.kavin.rocks/Redpoint/rss',\n",
134
- " '83north.com/': 'https://nitter.kavin.rocks/83NorthVC/rss',\n",
135
- " 'bvp.com/atlas/': 'https://nitter.kavin.rocks/BessemerVP/rss'}\n",
136
- "'''"
137
- ]
138
- },
139
- {
140
- "cell_type": "code",
141
- "execution_count": 3,
142
- "metadata": {
143
- "id": "Ig5nSCbI6yuL"
144
- },
145
- "outputs": [],
146
- "source": [
147
- "from keybert import KeyBERT\n",
148
- "import pandas as pd\n",
149
- "from keyphrase_vectorizers import KeyphraseCountVectorizer\n",
150
- "from sentence_transformers import SentenceTransformer\n",
151
- "import numpy as np\n",
152
- "from sklearn.metrics.pairwise import cosine_similarity\n",
153
- "\n",
154
- "import feedparser\n",
155
- "import requests\n",
156
- "from bs4 import BeautifulSoup\n",
157
- "from openpyxl import Workbook\n",
158
- "import time\n",
159
- "import pickle\n",
160
- "import os\n",
161
- "from tqdm import tqdm\n",
162
- "from concurrent.futures import ThreadPoolExecutor\n",
163
- "#from functools import lru_cache\n",
164
- "\n",
165
- "# Define function to extract keywords from the HTML body using the YAKE keyword extractor\n",
166
- "def extract_keyphrases(text, kw_model, vectorizer, embedding_model):\n",
167
- " kph = [kw for kw, score in kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', vectorizer=vectorizer, use_mmr=True)]\n",
168
- " keyphrase_embeddings = embedding_model.encode(kph)\n",
169
- " return kph, keyphrase_embeddings\n",
170
- "\n",
171
- "def get_similarity_scores(keyword_embeddings, keyphrase_embeddings):\n",
172
- " similarity_scores = cosine_similarity(keyphrase_embeddings, keyword_embeddings).max(axis=1).astype(str).tolist()\n",
173
- " similarity_max = cosine_similarity(keyphrase_embeddings, keyword_embeddings).flatten().max().astype(str)\n",
174
- " return similarity_scores, similarity_max\n",
175
- "\n",
176
- "# Define function to get the redirected URL (if any) for a given URL\n",
177
- "def get_redirected_url(url_record, headers, expected_codes=(301, 302, 303, 307), timeout=60):\n",
178
- " try:\n",
179
- " res = requests.head(url_record['url'], headers=headers, timeout=timeout)\n",
180
- " if res.status_code in expected_codes:\n",
181
- " url_record['url'] = res.headers['location']\n",
182
- " elif res.status_code == 200:\n",
183
- " url_record['url'] = url_record['url']\n",
184
- " else:\n",
185
- " print(f\"Retrieving {url_record['url']} failed: Expected {expected_codes}, but received {res.status_code}: {res.reason}\")\n",
186
- " except requests.exceptions.Timeout:\n",
187
- " print(f\"\\nRequest timed out for {url_record['url']}\")\n",
188
- " return url_record\n",
189
- " except:\n",
190
- " return url_record\n",
191
- "\n",
192
- " return url_record\n",
193
- "\n",
194
- "# Define function to get the HTML body of a given URL\n",
195
- "def get_html_body(url, headers):\n",
196
- " try:\n",
197
- " response = requests.get(url, headers=headers, timeout=10)\n",
198
- " html = response.content\n",
199
- " soup = BeautifulSoup(html, 'html.parser')\n",
200
- " return soup.body.get_text()\n",
201
- " except:\n",
202
- " return ''\n",
203
- "\n",
204
- "# Define function to write data to the Excel sheet\n",
205
- "def write_data_to_excel(url_dict, filename):\n",
206
- " # Create a new Excel workbook and worksheet\n",
207
- " workbook = Workbook()\n",
208
- " worksheet = workbook.active\n",
209
- " worksheet.title = 'RSS Feeds'\n",
210
- "\n",
211
- " # Write the headers for the Excel sheet\n",
212
- " worksheet.cell(row=1, column=1, value='Feed Name')\n",
213
- " worksheet.cell(row=1, column=2, value='URL')\n",
214
- " worksheet.cell(row=1, column=3, value='Updated')\n",
215
- " worksheet.cell(row=1, column=4, value='Keyphrases')\n",
216
- " worksheet.cell(row=1, column=5, value='Similarity to supplied keywords')\n",
217
- " worksheet.cell(row=1, column=6, value='Similarity (max)')\n",
218
- " worksheet.cell(row=1, column=7, value='HTML Body')\n",
219
- "\n",
220
- " # Loop over the unique URLs and write them to the Excel sheet\n",
221
- " row_num = 2\n",
222
- " for url, data in url_dict.items():\n",
223
- " worksheet.cell(row=row_num, column=1, value=data['feed_name'])\n",
224
- " worksheet.cell(row=row_num, column=2, value=url)\n",
225
- " worksheet.cell(row=row_num, column=3, value=data['updated'])\n",
226
- " worksheet.cell(row=row_num, column=4, value=data['keyphrases'])\n",
227
- " worksheet.cell(row=row_num, column=5, value=data['similarity'])\n",
228
- " worksheet.cell(row=row_num, column=6, value=data['similarity_max'])\n",
229
- " worksheet.cell(row=row_num, column=7, value=data['html_body'])\n",
230
- "\n",
231
- " row_num += 1\n",
232
- "\n",
233
- " worksheet.freeze_panes = 'A2'\n",
234
- "\n",
235
- " # Set the number format for column A, except the first row\n",
236
- " for row in worksheet.iter_rows(min_row=2, min_col=3, max_col=3):\n",
237
- " for cell in row:\n",
238
- " cell.number_format = 'mm/dd/yyyy hh:mm:ss'\n",
239
- "\n",
240
- " # Save the Excel workbook\n",
241
- " workbook.save(filename)\n",
242
- "\n",
243
- " # Print confirmation message\n",
244
- " #print(f'RSS output written to excel sheet: {filename}')\n",
245
- "\n",
246
- "def remaining_entries_from_dict(filename, dictionary):\n",
247
- " pickle_data = {}\n",
248
- " if os.path.exists(filename):\n",
249
- " with open(filename, 'rb') as f:\n",
250
- " pickle_data = pickle.load(f)\n",
251
- " return list(set(dictionary.keys()) - set(pickle_data.keys()))\n",
252
- "\n",
253
- "def process_url(url):\n",
254
- " global url_dict\n",
255
- " \n",
256
- " #body = get_html_body(url, headers)\n",
257
- " #kph,keyphrase_embeddings = extract_keyphrases(body, kw_model, vectorizer, embedding_model)\n",
258
- " #similarity, similarity_max = get_similarity_scores(keyword_embeddings, keyphrase_embeddings)\n",
259
- "\n",
260
- " #url_dict[url]['keyphrases'] = ', '.join(kph)\n",
261
- " #url_dict[url]['similarity'] = ', '.join(similarity)\n",
262
- " #url_dict[url]['similarity_max'] = similarity_max\n",
263
- " #url_dict[url]['html_body'] = body\n",
264
- " \n",
265
- " url_dict[url]['keyphrases'] = ''\n",
266
- " url_dict[url]['similarity'] = ''\n",
267
- " url_dict[url]['similarity_max'] = ''\n",
268
- " url_dict[url]['html_body'] = \"Skipping this part, to speed up the process\"\n",
269
- "\n",
270
- " # Store temporary results to disk\n",
271
- " #with open(\"retrieved_urls.pkl\", 'wb') as f:\n",
272
- " # pickle.dump(url_dict, f)"
273
- ]
274
- },
275
- {
276
- "cell_type": "code",
277
- "execution_count": null,
278
- "metadata": {
279
  "colab": {
280
- "base_uri": "https://localhost:8080/"
281
  },
282
- "id": "5cHnJQDSDy1Q",
283
- "outputId": "53774e96-c10f-4d36-8864-2f9e1accbb87"
284
- },
285
- "outputs": [],
286
- "source": [
287
- "import pprint\n",
288
- "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
289
- "from tqdm import tqdm\n",
290
- "from datetime import datetime\n",
291
- "import nltk\n",
292
- "\n",
293
- "\n",
294
- "# Start the profiling timer\n",
295
- "start_time = time.time()\n",
296
- "\n",
297
- "# Initialize the SentenceTransformer model\n",
298
- "kw_model = KeyBERT('distilbert-base-nli-mean-tokens')\n",
299
- "vectorizer = KeyphraseCountVectorizer()\n",
300
- "embedding_model = SentenceTransformer('distilbert-base-nli-mean-tokens')\n",
301
- "nltk.download('stopwords', quiet=True)\n",
302
- "\n",
303
- "# Initialize variables\n",
304
- "headers = {\n",
305
- " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'\n",
306
- "}\n",
307
- "keyword_embeddings = embedding_model.encode(keywords) # Encode keywords using the embedding model\n",
308
- "\n",
309
- "def read_feeds(feed_list, how_old):\n",
310
- " global urls\n",
311
- " import sys\n",
312
- " import io\n",
313
- " from datetime import datetime, timedelta\n",
314
- "\n",
315
- " old_stdout = sys.stdout\n",
316
- " sys.stdout = mystdout = io.StringIO()\n",
317
- " print(\"A small change\")\n",
318
- " # Loop over the RSS feeds and keywords\n",
319
- " urls_temp = []\n",
320
- " urls = []\n",
321
- " feed_item_age_minimum = datetime.now() - timedelta(days=int(how_old.split()[0]))\n",
322
- "\n",
323
- " for keyword, rss_feed in tqdm(feed_list.items(), total=len(feed_list.items()), file=sys.stdout, bar_format='Reading feeds: {n}/{total} ({percentage:.0f}%), time elapsed: {elapsed}'):\n",
324
- " print(rss_feed)\n",
325
- " feed_name = rss_feed.split('/')[-1]\n",
326
- " feed = feedparser.parse(rss_feed)\n",
327
- " for entry in tqdm(feed.entries, total=len(feed.entries), file=sys.stdout, bar_format='\\tReading feed entries: {n}/{total} ({percentage:.0f}%), time elapsed: {elapsed}'):\n",
328
- " if 'content' in entry:\n",
329
- " for content in entry.content:\n",
330
- " soup = BeautifulSoup(content.get('value'), 'html.parser')\n",
331
- " updated = datetime.strptime(entry.updated, '%Y-%m-%dT%H:%M:%SZ')\n",
332
- " if updated > feed_item_age_minimum:\n",
333
- " urls_temp.extend([{'url': link.get('href'), 'updated': updated, 'feed_name': feed_name} for link in soup.find_all('a')])\n",
334
- "\n",
335
- " with ThreadPoolExecutor(max_workers=4) as executor:\n",
336
- " futures = [executor.submit(get_redirected_url, url, headers) for url in urls_temp]\n",
337
- " for future in tqdm(as_completed(futures), total=len(futures), file=sys.stdout, bar_format='Checking URLs: {n}/{total} ({percentage:.0f}%), time elapsed: {elapsed}'):\n",
338
- " urls.append(future.result())\n",
339
- "\n",
340
- " sys.stdout = old_stdout\n",
341
- " return mystdout.getvalue()\n",
342
- "\n",
343
- "def read_process_urls():\n",
344
- " import sys\n",
345
- " import io\n",
346
- " from datetime import datetime, timedelta\n",
347
- " old_stdout = sys.stdout\n",
348
- " sys.stdout = mystdout = io.StringIO()\n",
349
- "\n",
350
- " global urls\n",
351
- " global url_dict\n",
352
- " kw_for = {rss_feed.split('/')[-1]: keyword for keyword, rss_feed in feed_dict.items()}\n",
353
- "\n",
354
- " #print(f\"Urls: {urls}\")\n",
355
- " url_dict = {}\n",
356
- " for item in urls:\n",
357
- " feed_name = item['feed_name']\n",
358
- " updated = item['updated']\n",
359
- " url = item['url']\n",
360
- "\n",
361
- " import pprint\n",
362
- " pprint.pprint(url)\n",
363
- " pprint.pprint(kw_for[feed_name])\n",
364
- " if kw_for[feed_name] in url:\n",
365
- " if url not in url_dict.keys():\n",
366
- " url_dict[url] = {'updated': updated, 'feed_name': feed_name}\n",
367
- " else:\n",
368
- " if url_dict[url]['updated'] > updated:\n",
369
- " url_dict[url]['updated'] = updated\n",
370
- "\n",
371
- " start_parallel_loop_time = time.time()\n",
372
- " results = []\n",
373
- " with ThreadPoolExecutor(max_workers=4) as executor:\n",
374
- " futures = [executor.submit(process_url, url) for url in url_dict.keys()]#remaining_entries_from_dict(\"retrieved_urls.pkl\", url_dict)]\n",
375
- " for future in tqdm(as_completed(futures), total=len(futures), file=sys.stdout, bar_format='Reading URLs: {n}/{total} ({percentage:.0f}%), time elapsed: {elapsed}'):\n",
376
- " results.append(future.result())\n",
377
- " #print(f\"Parallel URL processing: {time.time() - start_parallel_loop_time:.3f} seconds\")\n",
378
- " print(f\"Total links processed: {len(url_dict.keys())}\")\n",
379
- "\n",
380
- " #with open(\"retrieved_urls.pkl\", 'wb') as f:\n",
381
- " # pickle.dump(url_dict, f)\n",
382
- "\n",
383
- " # Print the time taken by the main function\n",
384
- " print(f\"Total time needed: {time.time() - start_time:.3f} seconds\")\n",
385
- "\n",
386
- " # Write dataset to the Excel sheet\n",
387
- " write_data_to_excel(url_dict, 'newsletter_results.xlsx')\n",
388
- "\n",
389
- " sys.stdout = old_stdout\n",
390
- " return mystdout.getvalue()"
391
- ]
392
- },
393
- {
394
- "cell_type": "code",
395
- "execution_count": null,
396
- "metadata": {
397
- "colab": {
398
- "base_uri": "https://localhost:8080/",
399
- "height": 622,
400
- "referenced_widgets": [
401
- "55c86791cb9a491c8bb6217af108fa16",
402
- "63d6badce4dc45a0a4af8660c9a96b5d",
403
- "733bbc713dea4917bcdde749cafbd99c",
404
- "975eed9f527e441c99ff509b5cc53bc1",
405
- "3ef1bf00fdec434ebfe32715777368ff",
406
- "cf14a6e4b11840bdae7166460ea8485f",
407
- "1991f11a746b4c4ba005ae4eee9ae0ad",
408
- "4835d51282334101ac67c821bd5d628c",
409
- "106ead77e8064e65844b810e3f5d574d",
410
- "6537a1a32eab44b08900a48a6f005dd7",
411
- "d66e01f3991c4ec69e3e2f9d204c3898",
412
- "aa70b01aa66147f592652a30b890c888",
413
- "ff0115a2cae843c1b164e4b7d179ff24",
414
- "c52d63ae33c14fe9befe4debb4f7ab21",
415
- "eeb751b9682144ca90b1d467a6f525aa",
416
- "7347a703f72c4d459217afb92c3c05b1",
417
- "583f99b9cfd945a4afaede6cfa0961db",
418
- "c22b236a142144d3a0fdae9163a795d3",
419
- "93719585652c49cc8c32455577423532",
420
- "b30da31352da4278b8180b7f2c4080fa",
421
- "7ed878a7f4df4e52989d8e16c542842b",
422
- "2e7d9d0f096a4b5592ab3116766c9158",
423
- "03448be097a84dd6bc0c5064166b22d0",
424
- "938b6fc3ee6a4cbd95196a6e5ab111d9",
425
- "5728cc854a2a4c6fa037b78889dedb20",
426
- "1d8c3034625f4b598a1875b74f7efdfd",
427
- "0cc60859ebd7478fad11cb51113da820",
428
- "1bb65fbc92144355a0977d88d6c3bf50",
429
- "81ca37de802f427791e4d0f740735d0a",
430
- "a27f0a049a5b4d869d1297afac81099f",
431
- "bfa325bddbec4f98bd60e436461db0af",
432
- "27a8d7b324ba43798be54e7685eb345a",
433
- "8db6fe180d234c7d9d60cadcfadc5bc7",
434
- "8e4869a380aa4f5faa05335c16d6e972",
435
- "997639d0dcc8461a89fb9bcdcfffacbe"
436
- ]
437
  },
438
- "id": "FNR1jfm-jsgb",
439
- "outputId": "aae04a22-81e1-4e97-cc47-bbcc84503558"
440
- },
441
- "outputs": [],
442
- "source": [
443
- "from ipywidgets import HTML\n",
444
- "\n",
445
- "read_feeds(feed_dict, feed_age)\n",
446
- "display(HTML(f\"Total links examined: {len(urls)}\"))\n",
447
- "\n",
448
- "read_process_urls()\n",
449
- "display(HTML(f\"Relevant links found: {len(url_dict.keys())}\"))\n",
450
- "display(HTML(f\"------------------------------\"))\n",
451
- "\n",
452
- "for url in url_dict.keys():\n",
453
- " #print(url)\n",
454
- " display(HTML(f\"{url}\"))\n"
455
- ]
456
- },
457
- {
458
- "cell_type": "code",
459
- "execution_count": null,
460
- "metadata": {},
461
- "outputs": [],
462
- "source": [
463
- "urls"
464
- ]
465
- }
466
- ],
467
- "metadata": {
468
- "accelerator": "GPU",
469
- "colab": {
470
- "provenance": []
471
- },
472
- "gpuClass": "standard",
473
- "kernelspec": {
474
- "display_name": "Python 3 (ipykernel)",
475
- "language": "python",
476
- "name": "python3"
477
  },
478
- "language_info": {
479
- "codemirror_mode": {
480
- "name": "ipython",
481
- "version": 3
482
- },
483
- "file_extension": ".py",
484
- "mimetype": "text/x-python",
485
- "name": "python",
486
- "nbconvert_exporter": "python",
487
- "pygments_lexer": "ipython3",
488
- "version": "3.9.7"
489
- }
490
- },
491
- "nbformat": 4,
492
- "nbformat_minor": 1
493
- }
 
1
  {
2
+ "cells": [
3
+ {
4
+ "cell_type": "raw",
5
+ "metadata": {
6
+ "id": "uIxcPJeuGGAF"
7
+ },
8
+ "source": [
9
+ "---\n",
10
+ "title: Newsletter Helper\n",
11
+ "description: Follow the instructions on screen\n",
12
+ "show-code: false\n",
13
+ "params:\n",
14
+ " feed_keywords:\n",
15
+ " label: Sources\n",
16
+ " input: select\n",
17
+ " value: ['a16z.com/',\n",
18
+ " 'sequoiacap.com/article',\n",
19
+ " 'zettavp.com/playbook/',\n",
20
+ " 'atomico.com/insights/',\n",
21
+ " 'nt-z.ro/',\n",
22
+ " 'accel.com/noteworthy',\n",
23
+ " 'felicis.com/',\n",
24
+ " 'scalevp.com/blog/',\n",
25
+ " 'redpoint.com/start/',\n",
26
+ " '83north.com/',\n",
27
+ " 'bvp.com/atlas/']\n",
28
+ " choices: ['a16z.com/',\n",
29
+ " 'sequoiacap.com/article',\n",
30
+ " 'zettavp.com/playbook/',\n",
31
+ " 'atomico.com/insights/',\n",
32
+ " 'nt-z.ro/',\n",
33
+ " 'accel.com/noteworthy',\n",
34
+ " 'felicis.com/',\n",
35
+ " 'scalevp.com/blog/',\n",
36
+ " 'redpoint.com/start/',\n",
37
+ " '83north.com/',\n",
38
+ " 'bvp.com/atlas/']\n",
39
+ " multi: True\n",
40
+ " feed_age:\n",
41
+ " label: How old?\n",
42
+ " input: select\n",
43
+ " value: '7 days'\n",
44
+ " choices: ['7 days', '14 days', '30 days']\n",
45
+ " multi: False\n",
46
+ "---"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 2,
52
+ "metadata": {
53
+ "id": "pfJ5NpqjCT1U"
54
+ },
55
+ "outputs": [],
56
+ "source": [
57
+ "feed_keywords = ['a16z.com/',\n",
58
+ " 'sequoiacap.com/article',\n",
59
+ " 'zettavp.com/playbook/',\n",
60
+ " 'atomico.com/insights/',\n",
61
+ " 'nt-z.ro/',\n",
62
+ " 'accel.com/noteworthy',\n",
63
+ " 'felicis.com/',\n",
64
+ " 'scalevp.com/blog/',\n",
65
+ " 'redpoint.com/start/',\n",
66
+ " '83north.com/',\n",
67
+ " 'bvp.com/atlas/']\n",
68
+ "feed_age = '28 days'"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": 3,
74
+ "metadata": {
75
+ "id": "mEOS4asyGGAI"
76
+ },
77
+ "outputs": [],
78
+ "source": [
79
+ "keywords = [\"Electro mobility\",\n",
80
+ " \"Batteries \",\n",
81
+ " \"Battery Management systems\",\n",
82
+ " \"Lidars\",\n",
83
+ " \"RADARS\",\n",
84
+ " \"AI\",\n",
85
+ " \"Industrial AI\",\n",
86
+ " \"Transportation\",\n",
87
+ " \"Mobility\",\n",
88
+ " \"Climate Tech\",\n",
89
+ " \"Sustainable grid\",\n",
90
+ " \"Sensor fusion\",\n",
91
+ " \"Computer vision\",\n",
92
+ " \"Data Analytics\",\n",
93
+ " \"Digital Twins\",\n",
94
+ " \"Automotive Cybersecurity\",\n",
95
+ " \"Logistics\",\n",
96
+ " \"Ports\",\n",
97
+ " \"Construction sites\",\n",
98
+ " \"Mines\",\n",
99
+ " \"Quarries\",\n",
100
+ " \"Trucks\",\n",
101
+ " \"Power train\",\n",
102
+ " \"Software defined vehicle\"]\n",
103
+ "\n",
104
+ "feed = \"https://www.rssground.com/p/Newsletter\""
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "source": [
110
+ "#!pip install keybert\n",
111
+ "#!pip install feedparser\n",
112
+ "#!pip install keyphrase_vectorizers\n",
113
+ "#!pip install sentence-transformers"
114
+ ],
115
+ "metadata": {
116
+ "id": "WMswc6FCGR9T"
117
+ },
118
+ "execution_count": 11,
119
+ "outputs": []
120
+ },
121
  {
122
+ "cell_type": "code",
123
+ "execution_count": 5,
124
+ "metadata": {
125
+ "id": "Ig5nSCbI6yuL"
126
+ },
127
+ "outputs": [],
128
+ "source": [
129
+ "from keybert import KeyBERT\n",
130
+ "import pandas as pd\n",
131
+ "from keyphrase_vectorizers import KeyphraseCountVectorizer\n",
132
+ "from sentence_transformers import SentenceTransformer\n",
133
+ "import numpy as np\n",
134
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
135
+ "\n",
136
+ "import feedparser\n",
137
+ "import requests\n",
138
+ "from bs4 import BeautifulSoup\n",
139
+ "from openpyxl import Workbook\n",
140
+ "import time\n",
141
+ "import pickle\n",
142
+ "import os\n",
143
+ "from tqdm import tqdm\n",
144
+ "from concurrent.futures import ThreadPoolExecutor\n",
145
+ "#from functools import lru_cache\n",
146
+ "\n",
147
+ "# Define function to extract keywords from the HTML body using the YAKE keyword extractor\n",
148
+ "def extract_keyphrases(text, kw_model, vectorizer, embedding_model):\n",
149
+ " kph = [kw for kw, score in kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', vectorizer=vectorizer, use_mmr=True)]\n",
150
+ " keyphrase_embeddings = embedding_model.encode(kph)\n",
151
+ " return kph, keyphrase_embeddings\n",
152
+ "\n",
153
+ "def get_similarity_scores(keyword_embeddings, keyphrase_embeddings):\n",
154
+ " similarity_scores = cosine_similarity(keyphrase_embeddings, keyword_embeddings).max(axis=1).astype(str).tolist()\n",
155
+ " similarity_max = cosine_similarity(keyphrase_embeddings, keyword_embeddings).flatten().max().astype(str)\n",
156
+ " return similarity_scores, similarity_max\n",
157
+ "\n",
158
+ "# Define function to get the redirected URL (if any) for a given URL\n",
159
+ "def get_redirected_url(url_record, headers, expected_codes=(301, 302, 303, 307), timeout=60):\n",
160
+ " try:\n",
161
+ " res = requests.head(url_record['url'], headers=headers, timeout=timeout)\n",
162
+ " if res.status_code in expected_codes:\n",
163
+ " url_record['url'] = res.headers['location']\n",
164
+ " elif res.status_code == 200:\n",
165
+ " url_record['url'] = url_record['url']\n",
166
+ " else:\n",
167
+ " print(f\"Retrieving {url_record['url']} failed: Expected {expected_codes}, but received {res.status_code}: {res.reason}\")\n",
168
+ " except requests.exceptions.Timeout:\n",
169
+ " print(f\"\\nRequest timed out for {url_record['url']}\")\n",
170
+ " return url_record\n",
171
+ " except:\n",
172
+ " return url_record\n",
173
+ "\n",
174
+ " return url_record\n",
175
+ "\n",
176
+ "# Define function to get the HTML body of a given URL\n",
177
+ "def get_html_body(url, headers):\n",
178
+ " try:\n",
179
+ " response = requests.get(url, headers=headers, timeout=10)\n",
180
+ " html = response.content\n",
181
+ " soup = BeautifulSoup(html, 'html.parser')\n",
182
+ " return soup.body.get_text()\n",
183
+ " except:\n",
184
+ " return ''\n",
185
+ "\n",
186
+ "# Define function to write data to the Excel sheet\n",
187
+ "def write_data_to_excel(url_dict, filename):\n",
188
+ " # Create a new Excel workbook and worksheet\n",
189
+ " workbook = Workbook()\n",
190
+ " worksheet = workbook.active\n",
191
+ " worksheet.title = 'RSS Feeds'\n",
192
+ "\n",
193
+ " # Write the headers for the Excel sheet\n",
194
+ " worksheet.cell(row=1, column=1, value='Feed Name')\n",
195
+ " worksheet.cell(row=1, column=2, value='URL')\n",
196
+ " worksheet.cell(row=1, column=3, value='Updated')\n",
197
+ " worksheet.cell(row=1, column=4, value='Keyphrases')\n",
198
+ " worksheet.cell(row=1, column=5, value='Similarity to supplied keywords')\n",
199
+ " worksheet.cell(row=1, column=6, value='Similarity (max)')\n",
200
+ " worksheet.cell(row=1, column=7, value='HTML Body')\n",
201
+ "\n",
202
+ " # Loop over the unique URLs and write them to the Excel sheet\n",
203
+ " row_num = 2\n",
204
+ " for url, data in url_dict.items():\n",
205
+ " worksheet.cell(row=row_num, column=1, value=data['feed_name'])\n",
206
+ " worksheet.cell(row=row_num, column=2, value=url)\n",
207
+ " worksheet.cell(row=row_num, column=3, value=data['updated'])\n",
208
+ " worksheet.cell(row=row_num, column=4, value=data['keyphrases'])\n",
209
+ " worksheet.cell(row=row_num, column=5, value=data['similarity'])\n",
210
+ " worksheet.cell(row=row_num, column=6, value=data['similarity_max'])\n",
211
+ " worksheet.cell(row=row_num, column=7, value=data['html_body'])\n",
212
+ "\n",
213
+ " row_num += 1\n",
214
+ "\n",
215
+ " worksheet.freeze_panes = 'A2'\n",
216
+ "\n",
217
+ " # Set the number format for column A, except the first row\n",
218
+ " for row in worksheet.iter_rows(min_row=2, min_col=3, max_col=3):\n",
219
+ " for cell in row:\n",
220
+ " cell.number_format = 'mm/dd/yyyy hh:mm:ss'\n",
221
+ "\n",
222
+ " # Save the Excel workbook\n",
223
+ " workbook.save(filename)\n",
224
+ "\n",
225
+ " # Print confirmation message\n",
226
+ " #print(f'RSS output written to excel sheet: {filename}')\n",
227
+ "\n",
228
+ "def remaining_entries_from_dict(filename, dictionary):\n",
229
+ " pickle_data = {}\n",
230
+ " if os.path.exists(filename):\n",
231
+ " with open(filename, 'rb') as f:\n",
232
+ " pickle_data = pickle.load(f)\n",
233
+ " return list(set(dictionary.keys()) - set(pickle_data.keys()))\n",
234
+ "\n",
235
+ "def process_url(url):\n",
236
+ " global url_dict\n",
237
+ " \n",
238
+ " #body = get_html_body(url, headers)\n",
239
+ " #kph,keyphrase_embeddings = extract_keyphrases(body, kw_model, vectorizer, embedding_model)\n",
240
+ " #similarity, similarity_max = get_similarity_scores(keyword_embeddings, keyphrase_embeddings)\n",
241
+ "\n",
242
+ " #url_dict[url]['keyphrases'] = ', '.join(kph)\n",
243
+ " #url_dict[url]['similarity'] = ', '.join(similarity)\n",
244
+ " #url_dict[url]['similarity_max'] = similarity_max\n",
245
+ " #url_dict[url]['html_body'] = body\n",
246
+ " \n",
247
+ " url_dict[url]['keyphrases'] = ''\n",
248
+ " url_dict[url]['similarity'] = ''\n",
249
+ " url_dict[url]['similarity_max'] = ''\n",
250
+ " url_dict[url]['html_body'] = \"Skipping this part, to speed up the process\"\n",
251
+ "\n",
252
+ " # Store temporary results to disk\n",
253
+ " #with open(\"retrieved_urls.pkl\", 'wb') as f:\n",
254
+ " # pickle.dump(url_dict, f)"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": null,
260
+ "metadata": {
261
+ "id": "5cHnJQDSDy1Q"
262
+ },
263
+ "outputs": [],
264
+ "source": [
265
+ "import pprint\n",
266
+ "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
267
+ "from tqdm import tqdm\n",
268
+ "from datetime import datetime\n",
269
+ "import nltk\n",
270
+ "\n",
271
+ "\n",
272
+ "# Start the profiling timer\n",
273
+ "#start_time = time.time()\n",
274
+ "\n",
275
+ "# Initialize the SentenceTransformer model\n",
276
+ "kw_model = KeyBERT('distilbert-base-nli-mean-tokens')\n",
277
+ "vectorizer = KeyphraseCountVectorizer()\n",
278
+ "embedding_model = SentenceTransformer('distilbert-base-nli-mean-tokens')\n",
279
+ "nltk.download('stopwords', quiet=True)\n",
280
+ "\n",
281
+ "# Initialize variables\n",
282
+ "headers = {\n",
283
+ " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'\n",
284
+ "}\n",
285
+ "keyword_embeddings = embedding_model.encode(keywords) # Encode keywords using the embedding model\n",
286
+ "\n",
287
+ "def filter_strings(lst1, lst2):\n",
288
+ " \"\"\"\n",
289
+ " Filters the list `lst2` and returns only the elements that have any of the elements of `lst1` as a substring.\n",
290
+ "\n",
291
+ " Args:\n",
292
+ " lst1 (list): The list of substrings to match against.\n",
293
+ " lst2 (list): The list of strings to filter.\n",
294
+ "\n",
295
+ " Returns:\n",
296
+ " list: A new list containing the filtered elements from `lst2`.\n",
297
+ "\n",
298
+ " Examples:\n",
299
+ " >>> lst1 = ['apple', 'banana', 'orange']\n",
300
+ " >>> lst2 = ['apple pie', 'banana bread', 'cherry pie', 'orange juice']\n",
301
+ " >>> filter_strings(lst1, lst2)\n",
302
+ " ['apple pie', 'banana bread', 'orange juice']\n",
303
+ " \"\"\"\n",
304
+ " filtered_lst2 = [s for s in lst2 if any(substring in s for substring in lst1)]\n",
305
+ " return filtered_lst2\n",
306
+ "\n",
307
+ "\n",
308
+ "def read_feeds(rss_feed, how_old):\n",
309
+ " global urls\n",
310
+ " import sys\n",
311
+ " import io\n",
312
+ " import re\n",
313
+ " from datetime import datetime, timedelta\n",
314
+ " import pytz\n",
315
+ "\n",
316
+ " old_stdout = sys.stdout\n",
317
+ " sys.stdout = mystdout = io.StringIO()\n",
318
+ "\n",
319
+ " # Loop over the RSS feeds and keywords\n",
320
+ " urls_temp = []\n",
321
+ " urls = []\n",
322
+ "\n",
323
+ " # Get the desired timezone\n",
324
+ " timezone = pytz.timezone('Europe/Stockholm') # Replace 'Your_Timezone_Here' with the desired timezone\n",
325
+ "\n",
326
+ " # Calculate the age with timezone\n",
327
+ " feed_item_age_minimum = datetime.now(timezone) - timedelta(days=int(how_old.split()[0]))\n",
328
+ "\n",
329
+ " feed = feedparser.parse(rss_feed)\n",
330
+ " for entry in tqdm(feed.entries, total=len(feed.entries), file=sys.stdout, bar_format='\\tReading feed entries: {n}/{total} ({percentage:.0f}%), time elapsed: {elapsed}'):\n",
331
+ " soup = BeautifulSoup(entry.summary, 'html.parser')\n",
332
+ " updated = datetime.strptime(entry.published, '%a, %d %b %Y %H:%M:%S %z')\n",
333
+ " if re.search(r'@([^ ]+)', entry.title):\n",
334
+ " feed_name = re.search(r'@([^ ]+)', entry.title).group(1)\n",
335
+ " else:\n",
336
+ " feed_name = ''\n",
337
+ " if updated > feed_item_age_minimum:\n",
338
+ " urls_temp.extend([{'url': link.get('href'), 'updated': updated, 'feed_name': feed_name} for link in soup.find_all('a')])\n",
339
+ "\n",
340
+ " with ThreadPoolExecutor(max_workers=4) as executor:\n",
341
+ " futures = [executor.submit(get_redirected_url, url, headers) for url in urls_temp]\n",
342
+ " for future in tqdm(as_completed(futures), total=len(futures), file=sys.stdout, bar_format='Checking URLs: {n}/{total} ({percentage:.0f}%), time elapsed: {elapsed}'):\n",
343
+ " urls.append(future.result())\n",
344
+ "\n",
345
+ " sys.stdout = old_stdout\n",
346
+ " return mystdout.getvalue()\n",
347
+ "\n",
348
+ "def read_process_urls():\n",
349
+ " import sys\n",
350
+ " import io\n",
351
+ " from datetime import datetime, timedelta\n",
352
+ " old_stdout = sys.stdout\n",
353
+ " sys.stdout = mystdout = io.StringIO()\n",
354
+ "\n",
355
+ " global urls\n",
356
+ " global url_dict\n",
357
+ "\n",
358
+ " #print(f\"Urls: {urls}\")\n",
359
+ " url_dict = {}\n",
360
+ " for item in filter_strings(feed_keywords, urls):\n",
361
+ " feed_name = item['feed_name']\n",
362
+ " updated = item['updated']\n",
363
+ " url = item['url']\n",
364
+ "\n",
365
+ " import pprint\n",
366
+ " pprint.pprint(url)\n",
367
+ " if url not in url_dict.keys():\n",
368
+ " url_dict[url] = {'updated': updated, 'feed_name': feed_name}\n",
369
+ " else:\n",
370
+ " if url_dict[url]['updated'] > updated:\n",
371
+ " url_dict[url]['updated'] = updated\n",
372
+ "\n",
373
+ " start_parallel_loop_time = time.time()\n",
374
+ " results = []\n",
375
+ " with ThreadPoolExecutor(max_workers=4) as executor:\n",
376
+ " futures = [executor.submit(process_url, url) for url in url_dict.keys()]#remaining_entries_from_dict(\"retrieved_urls.pkl\", url_dict)]\n",
377
+ " for future in tqdm(as_completed(futures), total=len(futures), file=sys.stdout, bar_format='Reading URLs: {n}/{total} ({percentage:.0f}%), time elapsed: {elapsed}'):\n",
378
+ " results.append(future.result())\n",
379
+ " #print(f\"Parallel URL processing: {time.time() - start_parallel_loop_time:.3f} seconds\")\n",
380
+ " print(f\"Total links processed: {len(url_dict.keys())}\")\n",
381
+ "\n",
382
+ " #with open(\"retrieved_urls.pkl\", 'wb') as f:\n",
383
+ " # pickle.dump(url_dict, f)\n",
384
+ "\n",
385
+ " # Print the time taken by the main function\n",
386
+ " #print(f\"Total time needed: {time.time() - start_time:.3f} seconds\")\n",
387
+ "\n",
388
+ " # Write dataset to the Excel sheet\n",
389
+ " write_data_to_excel(url_dict, 'newsletter_results.xlsx')\n",
390
+ "\n",
391
+ " sys.stdout = old_stdout\n",
392
+ " return mystdout.getvalue()"
393
+ ]
394
+ },
395
+ {
396
+ "cell_type": "code",
397
+ "execution_count": null,
398
+ "metadata": {
399
+ "id": "FNR1jfm-jsgb"
400
+ },
401
+ "outputs": [],
402
+ "source": [
403
+ "from ipywidgets import HTML\n",
404
+ "\n",
405
+ "read_feeds(feed, feed_age)\n",
406
+ "display(HTML(f\"Total links examined: {len(urls)}\"))\n",
407
+ "\n",
408
+ "read_process_urls()\n",
409
+ "display(HTML(f\"Relevant links found: {len(url_dict.keys())}\"))\n",
410
+ "display(HTML(f\"------------------------------\"))\n",
411
+ "\n",
412
+ "for url in url_dict.keys():\n",
413
+ " #print(url)\n",
414
+ " display(HTML(f\"{url}\"))\n"
415
  ]
 
 
 
 
416
  }
417
+ ],
418
+ "metadata": {
419
+ "accelerator": "GPU",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  "colab": {
421
+ "provenance": []
422
  },
423
+ "gpuClass": "standard",
424
+ "kernelspec": {
425
+ "display_name": "Python 3 (ipykernel)",
426
+ "language": "python",
427
+ "name": "python3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  },
429
+ "language_info": {
430
+ "codemirror_mode": {
431
+ "name": "ipython",
432
+ "version": 3
433
+ },
434
+ "file_extension": ".py",
435
+ "mimetype": "text/x-python",
436
+ "name": "python",
437
+ "nbconvert_exporter": "python",
438
+ "pygments_lexer": "ipython3",
439
+ "version": "3.9.7"
440
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  },
442
+ "nbformat": 4,
443
+ "nbformat_minor": 0
444
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
vc_newsletter_results.xlsx DELETED
Binary file (5.51 kB)