github-actions commited on
Commit
737c9a0
·
1 Parent(s): d0c8642

Update dataset

Browse files
Files changed (1) hide show
  1. update_data.py +15 -12
update_data.py CHANGED
@@ -1,6 +1,6 @@
1
  import gspread
2
  import logging
3
- import os
4
  import pandas as pd
5
  import requests
6
  import subprocess
@@ -13,10 +13,12 @@ def read_existing_data(file_path):
13
  """Reads existing data from a CSV file to compare with newly fetched data."""
14
  if os.path.exists(file_path):
15
  logging.info("Existing data file found. Reading data...")
16
- return pd.read_csv(file_path)
 
 
17
  else:
18
  logging.info("No existing data file found.")
19
- return pd.DataFrame() # Return an empty DataFrame if file does not exist
20
 
21
  def download_sheet(sheet_id, range_name):
22
  """Downloads data from Google Sheets and returns a DataFrame."""
@@ -40,8 +42,9 @@ def download_sheet(sheet_id, range_name):
40
  data = worksheet.get_all_records()
41
 
42
  # Convert to DataFrame
43
- df = pd.DataFrame(data)
44
- return df
 
45
  except gspread.exceptions.APIError as e:
46
  logging.error(f"API error occurred: {e}")
47
  raise
@@ -65,7 +68,7 @@ def git_commit_push():
65
  try:
66
  subprocess.run(['git', 'config', '--global', 'user.name', 'github-actions'], check=True)
67
  subprocess.run(['git', 'config', '--global', 'user.email', '[email protected]'], check=True)
68
- subprocess.run(['git', 'add', 'omoku_data.csv'], check=True)
69
  subprocess.run(['git', 'commit', '-m', 'Update dataset'], check=True)
70
  subprocess.run(['git', 'push'], check=True)
71
  logging.info(f"Data updated successfully.")
@@ -82,15 +85,15 @@ if __name__ == "__main__":
82
  RANGE_NAME = 'data'
83
  FILE_PATH = 'omoku_data.csv'
84
 
85
- new_data = download_sheet(SHEET_ID, RANGE_NAME)
86
  existing_data = read_existing_data(FILE_PATH)
 
87
 
88
- if not new_data.empty and new_data.equals(existing_data):
89
  logging.info("No new data to update.")
90
- exit(0)
91
-
92
- save_to_csv(new_data, FILE_PATH)
93
- git_commit_push()
94
  except Exception as e:
95
  logging.critical(f"An unexpected error occurred: {e}")
96
  raise
 
1
  import gspread
2
  import logging
3
+ import os, sys
4
  import pandas as pd
5
  import requests
6
  import subprocess
 
13
  """Reads existing data from a CSV file to compare with newly fetched data."""
14
  if os.path.exists(file_path):
15
  logging.info("Existing data file found. Reading data...")
16
+ e_df = pd.read_csv(file_path)
17
+ logging.info(f"Data read successfully: {e_df.shape[0]} rows and {e_df.shape[1]} columns.")
18
+ return e_df
19
  else:
20
  logging.info("No existing data file found.")
21
+ return pd.DataFrame()
22
 
23
  def download_sheet(sheet_id, range_name):
24
  """Downloads data from Google Sheets and returns a DataFrame."""
 
42
  data = worksheet.get_all_records()
43
 
44
  # Convert to DataFrame
45
+ n_df = pd.DataFrame(data)
46
+ logging.info(f"Data downloaded successfully: {n_df.shape[0]} rows and {n_df.shape[1]} columns.")
47
+ return n_df
48
  except gspread.exceptions.APIError as e:
49
  logging.error(f"API error occurred: {e}")
50
  raise
 
68
  try:
69
  subprocess.run(['git', 'config', '--global', 'user.name', 'github-actions'], check=True)
70
  subprocess.run(['git', 'config', '--global', 'user.email', '[email protected]'], check=True)
71
+ subprocess.run(['git', 'add', '.'], check=True)
72
  subprocess.run(['git', 'commit', '-m', 'Update dataset'], check=True)
73
  subprocess.run(['git', 'push'], check=True)
74
  logging.info(f"Data updated successfully.")
 
85
  RANGE_NAME = 'data'
86
  FILE_PATH = 'omoku_data.csv'
87
 
 
88
  existing_data = read_existing_data(FILE_PATH)
89
+ new_data = download_sheet(SHEET_ID, RANGE_NAME)
90
 
91
+ if new_data.equals(existing_data):
92
  logging.info("No new data to update.")
93
+ sys.exit(0)
94
+ else:
95
+ save_to_csv(new_data, FILE_PATH)
96
+ git_commit_push()
97
  except Exception as e:
98
  logging.critical(f"An unexpected error occurred: {e}")
99
  raise