Skip to content

Commit 141eadd

Browse files
committed
Code cleanup
1 parent 608f373 commit 141eadd

File tree

9 files changed

+250
-45
lines changed

9 files changed

+250
-45
lines changed

.gitignore

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
98+
__pypackages__/
99+
100+
# Celery stuff
101+
celerybeat-schedule
102+
celerybeat.pid
103+
104+
# SageMath parsed files
105+
*.sage.py
106+
107+
# Environments
108+
.env
109+
.venv
110+
env/
111+
venv/
112+
ENV/
113+
env.bak/
114+
venv.bak/
115+
116+
# Spyder project settings
117+
.spyderproject
118+
.spyproject
119+
120+
# Rope project settings
121+
.ropeproject
122+
123+
# mkdocs documentation
124+
/site
125+
126+
# mypy
127+
.mypy_cache/
128+
.dmypy.json
129+
dmypy.json
130+
131+
# Pyre type checker
132+
.pyre/
133+
134+
# pytype static type analyzer
135+
.pytype/
136+
137+
# Cython debug symbols
138+
cython_debug/

README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# YFinance Data Downloader - Docker and Kubernetes Cron Job
2+
3+
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
4+
5+
YFinance minute and daily data downloader, with Dockerfile and kubernetes pod yaml declaration. The Pod has restart policy on failure and can be run as a CROn job on daily basis. The downloaded data is written to elasticsearch - which is running in the same K8 environment and is connected via service.
6+
7+
> This project makes use of Github Actions to build and publish the docker image.
8+
9+
## Publishing image to GitHub Packages Manually
10+
docker login docker.pkg.github.com -u USER_NAME -p PERSONAL_ACCESS_TOKEN
11+
12+
docker tag BUILD_IMAGE_ID docker.pkg.github.com/source-nerd/yahoo-data-downloader-docker-elastic/yahoo-data-downloader-docker-elastic:VERSION
13+
14+
docker build -t docker.pkg.github.com/source-nerd/yahoo-data-downloader-docker-elastic/yahoo-data-downloader-docker-elastic:VERSION .
15+
16+
docker push docker.pkg.github.com/source-nerd/yahoo-data-downloader-docker-elastic/yahoo-data-downloader-docker-elastic:VERSION
17+
18+
19+
## Deploying to Kubernetes
20+
1. Initialize the namespace
21+
2. Install the creds - [REF HERE](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/)
22+
```shell
23+
# Example:
24+
kubectl create secret docker-registry regcred \
25+
--docker-server=$DOCKER_REGISTRY_SERVER \
26+
--docker-username=$DOCKER_USER \
27+
--docker-password=$DOCKER_PASSWORD \
28+
--docker-email=$DOCKER_EMAIL
29+
30+
# Actual
31+
kubectl create secret docker-registry regcred --docker-server=docker.pkg.github.com --docker-username=source-nerd --docker-password=PERSONAL_ACCESS_TOKEN --docker-email=EMAIL
32+
```
33+
34+
kubectl --kubeconfig KUBE_CONF_PATH -n NAMESPACE apply -f pod.yaml
35+
36+
37+
## Feedback & Final Thoughts
38+
The code may not be very optimized, so if you tend to find any bug fixes, feature requests, pull requests, feedback, etc., are welcome... If you like this project, please do give it a star.
39+
40+
[![ForTheBadge built-with-love](http://ForTheBadge.com/images/badges/built-with-love.svg)](https://GitHub.com/Naereen/)
41+

__init__.py

Whitespace-only changes.

build.sh

Lines changed: 0 additions & 13 deletions
This file was deleted.

config

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
ES_HOST=elasticsearch.equitian-elk.svc.cluster.local
1+
ES_HOST=elasticsearch.NAMESPACE.svc.cluster.local
22
ES_PORT=9200
3-
ES_USERNAME=elastic
4-
ES_PASSWORD=5qq17MySsmBhN4DFvsJW
3+
ES_USERNAME=ES_USERNAME
4+
ES_PASSWORD=ES_PASSWORD
55

6-
INDEX_1_MIN=us_tickers_1m
7-
INDEX_1_DAY=us_tickers_1d
6+
INDEX_1_MIN=INDEX_1M
7+
INDEX_1_DAY=INDEX_1D
88

9-
STOCKS=AAPL,TSLA,MSFT
9+
STOCKS=AAPL,AMZN,AMD,FB,FISV,GS,GOOGL,INTC,MCD,MSFT,NVDA,PG,TSLA,V

finance_dataset_downloader.zip

-5.03 KB
Binary file not shown.

kubernetes/pod.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: data-downloader-pod
5+
namespace: equitian-app
6+
spec:
7+
imagePullSecrets:
8+
- name: regcred
9+
containers:
10+
- name: data-downloader-container
11+
image: docker.pkg.github.com/source-nerd/yahoo-data-downloader-docker-elastic/yahoo-data-downloader-docker-elastic:0.1
12+
env:
13+
- name: ES_HOST
14+
value: ES_HOST
15+
- name: ES_PORT
16+
value: "9200"
17+
- name: ES_USERNAME
18+
value: "elastic"
19+
- name: ES_PASSWORD
20+
value: ES_PASSWORD
21+
- name: INDEX_1_MIN
22+
value: INDEX_1_MIN
23+
- name: INDEX_1_DAY
24+
value: INDEX_1_DAY
25+
- name: STOCKS
26+
value: "AAPL,TSLA,MSFT"
27+
restartPolicy: OnFailure
File renamed without changes.

us_data_downloader.py

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,29 +14,29 @@
1414
STOCKS_LIST = str(os.getenv("STOCKS", "AAPL,TSLA,MSFT")).split(",")
1515
INDEX_1_MIN = os.getenv("INDEX_1_MIN", "us_tickers_1m")
1616
INDEX_1_DAY = os.getenv("INDEX_1_DAY", "us_tickers_1d")
17-
ES_HOST = os.getenv('ES_HOST', "localhost")
18-
ES_PORT = os.getenv('ES_PORT', "9200")
19-
ES_USERNAME = os.getenv('ES_USERNAME', "elastic")
20-
ES_PASSWORD = os.getenv('ES_PASSWORD', "SOME_RANDOM_PASS")
17+
ES_HOST = os.getenv("ES_HOST", "localhost")
18+
ES_PORT = os.getenv("ES_PORT", "9200")
19+
ES_USERNAME = os.getenv("ES_USERNAME", "elastic")
20+
ES_PASSWORD = os.getenv("ES_PASSWORD", "SOME_RANDOM_PASS")
2121

2222

2323
def random_id_generator():
24-
id_size = randrange(5,10)
25-
return ''.join(random.choices(string.ascii_uppercase + string.digits, k=id_size))
24+
id_size = randrange(5, 10)
25+
return "".join(random.choices(string.ascii_uppercase + string.digits, k=id_size))
2626

2727

2828
def doc_generator(df, ticker_name, date_time_field, es_index):
2929
df_iter = df.iterrows()
3030
for df_idx, document in df_iter:
3131
doc_dict = document.to_dict()
3232
doc_dict["ticker"] = ticker_name
33-
doc_id= str(document[date_time_field].value) + "-" + str(random_id_generator())
33+
doc_id = str(document[date_time_field].value) + "-" + str(random_id_generator())
3434
final_doc = {
35-
"_index": es_index,
36-
"_type": "_doc",
37-
"_id" : f"{doc_id}",
38-
"_source": doc_dict,
39-
}
35+
"_index": es_index,
36+
"_type": "_doc",
37+
"_id": f"{doc_id}",
38+
"_source": doc_dict,
39+
}
4040

4141
yield final_doc
4242

@@ -50,17 +50,18 @@ def download_1d_data(ticker, s_date, e_data):
5050
:return:
5151
"""
5252
data_interval = "1d"
53-
data_1_day = yf.download(tickers=ticker,
54-
start=s_date,
55-
end=e_data,
56-
interval=data_interval)
53+
data_1_day = yf.download(
54+
tickers=ticker, start=s_date, end=e_data, interval=data_interval
55+
)
5756
data_1_day.reset_index(level=0, inplace=True)
5857
return data_1_day
5958

6059

6160
def download_min_data(ticker, s_date, e_date):
6261
data_interval = "1m"
63-
LOGGER.info(f"Downloading {ticker} from Yahoo finance - s_data={s_date}, e_date={e_date}")
62+
LOGGER.info(
63+
f"Downloading {ticker} from Yahoo finance - s_data={s_date}, e_date={e_date}"
64+
)
6465
data_1_min = yf.download(
6566
tickers=ticker,
6667
start=s_date,
@@ -74,38 +75,49 @@ def download_min_data(ticker, s_date, e_date):
7475

7576

7677
def get_es_client():
77-
es_client = Elasticsearch([{'host': ES_HOST, 'port': ES_PORT}],
78-
http_auth=('elastic', '5qq17MySsmBhN4DFvsJW'),
79-
http_compress=True)
78+
es_client = Elasticsearch(
79+
[{"host": ES_HOST, "port": ES_PORT}],
80+
http_auth=("elastic", "5qq17MySsmBhN4DFvsJW"),
81+
http_compress=True
82+
)
8083
# es_client = Elasticsearch(['localhost'], port=9200, http_compress=True)
8184
return es_client
8285

8386

8487
def day_downloader_main():
85-
today_date = datetime.today().strftime('%Y-%m-%d')
88+
today_date = datetime.today().strftime("%Y-%m-%d")
8689
LOGGER.info(f"Downloading day's worth of data for date - [{today_date}]")
90+
es_client = get_es_client()
91+
8792
for item in STOCKS_LIST:
8893
res_data = download_1d_data(item, today_date, today_date)
89-
helpers.bulk(get_es_client(), doc_generator(res_data, item, "Date", INDEX_1_DAY))
94+
helpers.bulk(
95+
es_client, doc_generator(res_data, item, "Date", INDEX_1_DAY)
96+
)
9097
LOGGER.info(f"Done downloading data for - [{today_date}]")
9198

9299

93100
def min_downloader_main():
94-
start_date = (datetime.today() + timedelta(days=-2)).strftime('%Y-%m-%d')
95-
end_date = (datetime.today() + timedelta(days=1)).strftime('%Y-%m-%d')
101+
start_date = datetime.today().strftime("%Y-%m-%d")
102+
end_date = (datetime.today() + timedelta(days=1)).strftime("%Y-%m-%d")
103+
es_client = get_es_client()
96104

97105
LOGGER.info(f"Downloading day's worth of data for date - [{start_date}]")
98106
for item in STOCKS_LIST:
99107
res_data = download_min_data(item, start_date, end_date)
100108
LOGGER.info(res_data)
101109
if res_data.shape[0] > 0:
102-
helpers.bulk(get_es_client(), doc_generator(res_data, item, "Datetime", INDEX_1_MIN))
110+
helpers.bulk(
111+
es_client, doc_generator(res_data, item, "Datetime", INDEX_1_MIN)
112+
)
103113
LOGGER.info(f"Done downloading data for - [{start_date}]")
104114

105115

106116
if __name__ == "__main__":
107117
LOGGER.info("Inside main function")
108-
LOGGER.info(f"Global variables are:: {STOCKS_LIST}, {ES_HOST}, {ES_PORT}, {ES_USERNAME}, {ES_PASSWORD}")
118+
LOGGER.info(
119+
f"Global variables are:: {STOCKS_LIST}, {ES_HOST}, {ES_PORT}, {ES_USERNAME}, {ES_PASSWORD}"
120+
)
109121

110122
min_downloader_main()
111123
day_downloader_main()

0 commit comments

Comments
 (0)