cansa commited on
Commit
dbc6e65
·
verified ·
1 Parent(s): de4a757

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sys
3
+ import requests
4
+ import zipfile
5
+ import io
6
+ import ast
7
+
8
+
9
+ def is_file_type(file_path, file_extension):
10
+ """Check if the file has the specified file extension."""
11
+ return file_path.endswith(file_extension)
12
+
13
+
14
+ def is_likely_useful_file(file_path, lang="python"):
15
+ """Determine if the file is likely to be useful by excluding certain directories and specific file types."""
16
+ excluded_dirs = ["docs", "examples", "tests", "test", "scripts", "utils", "benchmarks"]
17
+ utility_or_config_files = []
18
+ github_workflow_or_docs = [".github", ".gitignore", "LICENSE"]
19
+
20
+ if lang == "python":
21
+ excluded_dirs.append("__pycache__")
22
+ utility_or_config_files.extend(["hubconf.py", "setup.py"])
23
+ github_workflow_or_docs.extend(["stale.py", "gen-card-", "write_model_card"])
24
+ elif lang == "go":
25
+ excluded_dirs.append("vendor")
26
+ utility_or_config_files.extend(["go.mod", "go.sum", "Makefile"])
27
+
28
+ if any(part.startswith(".") for part in file_path.split("/")):
29
+ return False
30
+ if "test" in file_path.lower():
31
+ return False
32
+ for excluded_dir in excluded_dirs:
33
+ if f"/{excluded_dir}/" in file_path or file_path.startswith(excluded_dir + "/"):
34
+ return False
35
+ for file_name in utility_or_config_files:
36
+ if file_name in file_path:
37
+ return False
38
+ for doc_file in github_workflow_or_docs:
39
+ if doc_file in file_path:
40
+ return False
41
+ return True
42
+
43
+
44
+ def is_test_file(file_content, lang):
45
+ """Determine if the file content suggests it is a test file."""
46
+ test_indicators = {"python": ["unittest", "pytest"], "go": ["testing"]}.get(lang, [])
47
+
48
+ if lang == "python":
49
+ try:
50
+ module = ast.parse(file_content)
51
+ for node in ast.walk(module):
52
+ if isinstance(node, ast.Import):
53
+ for alias in node.names:
54
+ if alias.name in test_indicators:
55
+ return True
56
+ elif isinstance(node, ast.ImportFrom):
57
+ if node.module in test_indicators:
58
+ return True
59
+ except SyntaxError:
60
+ pass
61
+
62
+ return False
63
+
64
+
65
+ def has_sufficient_content(file_content, min_line_count=10):
66
+ """Check if the file has a minimum number of substantive lines."""
67
+ lines = [line for line in file_content.split("\n") if line.strip() and not line.strip().startswith(("#", "//"))]
68
+ return len(lines) >= min_line_count
69
+
70
+
71
+ def remove_comments_and_docstrings(source):
72
+ """Remove comments and docstrings from the Python source code."""
73
+ tree = ast.parse(source)
74
+ for node in ast.walk(tree):
75
+ if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)) and ast.get_docstring(node):
76
+ node.body = node.body[1:] # Remove docstring
77
+ elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str):
78
+ node.value.s = "" # Remove comments
79
+ return ast.unparse(tree)
80
+
81
+
82
+ def download_repo(repo_url, branch_or_tag="master"):
83
+ """Download and process files from a GitHub repository."""
84
+ download_url = f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip"
85
+ lang = "python"
86
+
87
+ print(download_url)
88
+ response = requests.get(download_url)
89
+
90
+ if response.status_code == 200:
91
+ zip_file = zipfile.ZipFile(io.BytesIO(response.content))
92
+ file_contents = ""
93
+ print(zip_file.namelist())
94
+ for file_path in zip_file.namelist():
95
+ # Skip directories, non-language files, less likely useful files, hidden directories, and test files
96
+ if file_path.endswith("/") or not is_file_type(file_path, ".py") or not is_likely_useful_file(file_path):
97
+ print("Dir or non-lang or useless:", file_path)
98
+ continue
99
+ file_content = zip_file.read(file_path).decode("utf-8")
100
+
101
+ # Skip test files based on content
102
+ if is_test_file(file_content, lang):
103
+ print("Test file:", file_path)
104
+ continue
105
+ print("Appending", file_path)
106
+
107
+ file_contents += f"// File: {file_path}\n" if lang == "go" else f"# File: {file_path}\n"
108
+ file_contents += file_content
109
+ file_contents += "\n\n"
110
+ return file_contents
111
+ else:
112
+ print(f"Failed to download the repository. Status code: {response.status_code}")
113
+ sys.exit(1)
114
+
115
+
116
+ def download_and_process(repo_url, branch_or_tag="master"):
117
+ file_contents = download_repo(repo_url, branch_or_tag)
118
+ return file_contents
119
+
120
+
121
+ iface = gr.Interface(
122
+ fn=download_and_process,
123
+ inputs=[
124
+ gr.components.Textbox(label="GitHub Repository URL", value="https://github.com/cognitivecomputations/github2file"),
125
+ gr.components.Textbox(label="Branch or Tag", value="master"),
126
+ ],
127
+ outputs=gr.components.Code(
128
+ label="Output File",
129
+ language="python",
130
+ interactive=True,
131
+ ),
132
+ )
133
+
134
+ if __name__ == "__main__":
135
+ iface.launch()