forked from cesarbruschetta/doi_request-experiments
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_doi_not_found.py
More file actions
32 lines (24 loc) · 857 Bytes
/
generate_doi_not_found.py
File metadata and controls
32 lines (24 loc) · 857 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""
Script para consultar se os DOIS dados ainda estao apresentando erro 404
"""
import pandas as pd
import requests
from tqdm import tqdm
from requests.exceptions import HTTPError
def main():
doi_files = pd.read_csv("./SciELO_Brazil_DOI.csv", delimiter=";", low_memory=False)
doi_not_found = []
for index, row in tqdm(doi_files.iterrows(), total=len(doi_files)):
doi = row["doi"]
try:
r = requests.get(f"https://www.doi.org/doi/{doi}")
r.raise_for_status()
except HTTPError as exc:
row["request"] = str(exc)
if r.status_code == 404:
doi_not_found.append(row)
df_doi_not_found = pd.DataFrame(doi_not_found)
# Salvado arquivos para consultas futuras
df_doi_not_found.to_csv("./df_doi_not_found.csv")
if __name__ == "__main__":
main()