-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreateVersionsFile.py
More file actions
123 lines (105 loc) · 3.99 KB
/
createVersionsFile.py
File metadata and controls
123 lines (105 loc) · 3.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import requests
import time
import re
import os
from dotenv import load_dotenv
load_dotenv()
load_dotenv("filePaths.env")
load_dotenv("config.env")
GITHUB_ACCESS_TOKEN = os.getenv('GITHUB-ACCESS-TOKEN')
INPUT_FILE = os.getenv('FILTERED-PROJECTS-LIST-FILE')
OUTPUT_FILE = os.getenv('PROJECTS-VERSIONS-FILE')
PAGINATION_OFFSET = os.getenv('VERSIONS-FILE-OFFSET', False)
PAGINATION_LIMIT = os.getenv('VERSIONS-FILE-LIMIT', False)
s = requests.Session()
headers = {'Authorization': 'token ' + str(GITHUB_ACCESS_TOKEN)}
URLsToSave = []
def readURLsFromInputFile():
sourceFile = open(INPUT_FILE)
links = []
for line in sourceFile.readlines():
x = line.split("/")
links.append(f"{x[-2]}/{x[-1].strip()}/\n")
sourceFile.close()
return links
def checkRequestOffsetReached(currentPaginationIndex):
if (PAGINATION_OFFSET == False):
return True
return (str(PAGINATION_OFFSET) <= str(currentPaginationIndex))
def getVersions(repo):
originRepo = repo
repo = repo + "releases"
versions = s.get(f"https://api.github.com/repos/{repo}", headers=headers)
result = versions.json()
try:
projectFirstVersion = result[0]["tag_name"]
projectLastVersion = result[len(result) -1]["tag_name"]
return [projectLastVersion, projectFirstVersion]
except:
return getTags(originRepo)
def getTags(repo):
repo = repo + "tags"
tags = s.get(f"https://api.github.com/repos/{repo}", headers=headers)
result = tags.json()
try:
projectFirstVersion = result[0]["name"]
projectLastVersion = result[len(result) -1]["name"]
return [projectLastVersion, projectFirstVersion]
except:
return ["None", "None"]
def createRepoVersionURLString(strippedLine):
patt = re.compile('(\s*)/releases(\s*)')
link = patt.sub('\\1\\2', strippedLine)
return f"https://github.com/{link}"
def addVersionURL(baseURL, version):
global URLsToSave
versionURL = createURLWithVersion(baseURL, version)
URLsToSave.append(versionURL)
def createURLWithVersion(baseURL, version):
return f"{baseURL};{version}"
def saveVersionURLsToOutputIfValid():
global URLsToSave
outputFile = open(OUTPUT_FILE, 'w+')
for URL in URLsToSave:
if (validateURLVersionFormat(URL)):
if (validateURLIsNotDuplicated(URL)):
outputFile.write(URL + "\n")
time.sleep(1)
outputFile.close()
def validateURLVersionFormat(url):
print("VALIDATING: " + url)
regexTextAtBeginning = ";(.*(((\d+).?(\d+.)*(\*|\d+)?))|None)$"
regexTextAtEnd = ";(v?(((\d+).?(\d+.)*(\*|\d+)?).*)|None)$"
matchAtBeginning = re.search(regexTextAtBeginning, url)
matchAtEnd = re.search(regexTextAtEnd, url)
if (matchAtBeginning == False and matchAtEnd == False):
print("WARNING: URL was ignored since it has not valid version: " + url)
return (matchAtBeginning or matchAtEnd)
def validateURLIsNotDuplicated(url):
global URLsToSave
return (url in URLsToSave)
def checkIfProjectsNumberReachedLimit(currentPaginationIndex):
if (PAGINATION_LIMIT == False):
return False
return (str(currentPaginationIndex) == str(PAGINATION_LIMIT))
def main():
print("INFO: Fetching first and last versions of each project...")
currentPaginationIndex = 0
links = readURLsFromInputFile()
for line in links:
print(line)
if (checkRequestOffsetReached(currentPaginationIndex) == False):
currentPaginationIndex += 1
continue
strippedLine = line.strip()
versions = getVersions(strippedLine)
strippedLine = createRepoVersionURLString(strippedLine)
addVersionURL(strippedLine, versions[1])
addVersionURL(strippedLine, versions[0])
if (checkIfProjectsNumberReachedLimit(currentPaginationIndex)):
break
currentPaginationIndex += 1
saveVersionURLsToOutputIfValid()
print(f"Projects' versions were saved in file: " + OUTPUT_FILE)
if __name__ == "__main__":
main()