-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpyedgar.conf
65 lines (57 loc) · 2.81 KB
/
pyedgar.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
[Paths]
; Root directories, under which filings/cached tarballs will be found
; There is no % interpolation
; CACHE_FEED indicates whether the feed should be cached/searched locally
CACHE_FEED = True
; FILING_ROOT is the root of the extracted filings
FILING_ROOT=./
; FEED_CACHE_ROOT is the root of the compressed daily feed files from EDGAR
FEED_CACHE_ROOT=./
; CACHE_INDEX indicates whether the index should be cached/searched locally
CACHE_INDEX = True
; INDEX_ROOT is the root of the extracted index tab-delimited files
INDEX_ROOT=./edgar_data
; INDEX_CACHE_ROOT is the root of the
INDEX_CACHE_ROOT=./edgar_data
; FILING_PATH_FORMAT is the string to be .format-ed with the CIK and ACCESSION of the filing
; Don't put injection attacks here. That would be bad.
; Maximum length is 250 characters.
; Format string is formatted as an f-string (see docs), therefore slicing is possible.
; Available variables are:
; cik (int)
; cik_str (=f'{cik:010d}')
; accession (20 character format with dashes)
; and accession18 (18 characters of only digits with dashes removed)
; Examples:
; FILING_PATH_FORMAT={accession[11:13]}/{accession}.nc
; Would result in --> FILING_ROOT/95/0001005463-95-000003.nc
; This is useful for accession-only lookups (which is nice because multiple CIKs can file the same accession)
;
; FILING_PATH_FORMAT={cik_str[0:2]}/{cik_str[2:4]}/{cik_str[4:6]}/{cik_str[6:8]}/{cik_str[8:10]}/{accession}.txt
; Would result in --> FILING_ROOT/00/01/00/54/63/0001005463-95-000003.txt
; This uses CIK to break up filings, resulting in < 100 entries per directory. One problem is multiple CIKs
; can file the same accession, meaning you have to either copy the same accession filing to multiple dirs
;
; FILING_PATH_FORMAT={accession[:4]}/{accession[4:7]}/{accession[7:10]}/{accession[11:13]}/{accession[14:17]}/{accession[17:]}/{accession}.nc
; Would result in --> FILING_ROOT/1234/567/890/12/123/456/1234567890-12-123456.txt
; This is useful for only accession lookups (no CIKs) but also < 1000 entries per directory
;
FILING_PATH_FORMAT={accession[11:13]}/{accession}.nc
; Filename format for caching FEED compressed files from EDGAR
; String is passed .format(date=datetime object) of the date of the feed
FEED_CACHE_PATH_FORMAT={date:%Y%m%d}.nc.tar.gz
; Filename format for caching INDEX compressed files from EDGAR
; Available data are: date (datetime object), year, and quarter (both ints)
INDEX_CACHE_PATH_FORMAT=full_index_{year}_Q{quarter}.gz
[Downloader]
; Downloader specific settings
KEEP_ALL=True
KEEP_REGEX=
; User Agent for downloading, to keep the SEC happy
USER_AGENT=company_dns hello@mediumroast.io
[Index]
; Index file settings
INDEX_DELIMITER=\t
; Index file extension
; If you want to compress the index files, change INDEX_EXTENSION to tab.gz
INDEX_EXTENSION=tab.gz