fdroid-server/fdroidserver/scanner.py

455 lines
18 KiB
Python
Raw Normal View History

2016-01-04 16:33:20 +01:00
#!/usr/bin/env python3
#
# scanner.py - part of the FDroid server tools
# Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import imghdr
import json
import os
import re
import sys
import traceback
from argparse import ArgumentParser
import logging
import itertools
from . import _
2016-01-04 17:37:35 +01:00
from . import common
from . import metadata
from .exception import BuildException, VCSException
config = None
options = None
DEFAULT_JSON_PER_BUILD = {'errors': [], 'warnings': [], 'infos': []}
json_per_build = DEFAULT_JSON_PER_BUILD
MAVEN_URL_REGEX = re.compile(r"""\smaven\s*{.*?(?:setUrl|url)\s*=?\s*(?:uri)?\(?\s*["']?([^\s"']+)["']?[^}]*}""",
re.DOTALL)
def get_gradle_compile_commands(build):
compileCommands = ['compile',
'provided',
'apk',
'implementation',
'api',
'compileOnly',
'runtimeOnly']
buildTypes = ['', 'release']
flavors = ['']
if build.gradle and build.gradle != ['yes']:
flavors += build.gradle
commands = [''.join(c) for c in itertools.product(flavors, buildTypes, compileCommands)]
return [re.compile(r'\s*' + c, re.IGNORECASE) for c in commands]
def scan_binary(apkfile):
usual_suspects = {
# The `apkanalyzer dex packages` output looks like this:
# M d 1 1 93 <packagename> <other stuff>
# The first column has P/C/M/F for package, class, methos or field
# The second column has x/k/r/d for removed, kept, referenced and defined.
# We already filter for defined only in the apkanalyzer call. 'r' will be
# for things referenced but not distributed in the apk.
exp: re.compile(r'.[\s]*d[\s]*[0-9]*[\s]*[0-9*][\s]*[0-9]*[\s]*' + exp, re.IGNORECASE) for exp in [
r'(com\.google\.firebase[^\s]*)',
r'(com\.google\.android\.gms[^\s]*)',
r'(com\.google\.tagmanager[^\s]*)',
r'(com\.google\.analytics[^\s]*)',
r'(com\.android\.billing[^\s]*)',
]
}
logging.info("Scanning APK for known non-free classes.")
result = common.SdkToolsPopen(["apkanalyzer", "dex", "packages", "--defined-only", apkfile], output=False)
problems = 0
for suspect, regexp in usual_suspects.items():
matches = regexp.findall(result.output)
if matches:
for m in set(matches):
logging.debug("Found class '%s'" % m)
problems += 1
if problems:
logging.critical("Found problems in %s" % apkfile)
return problems
def scan_source(build_dir, build=metadata.Build()):
2017-04-13 12:30:04 +02:00
"""Scan the source code in the given directory (and all subdirectories)
and return the number of fatal problems encountered
"""
count = 0
# Common known non-free blobs (always lower case):
2015-09-17 02:13:54 +02:00
usual_suspects = {
exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [
r'flurryagent',
r'paypal.*mpl',
r'admob.*sdk.*android',
r'google.*ad.*view',
r'google.*admob',
r'google.*play.*services',
r'crittercism',
r'heyzap',
r'jpct.*ae',
r'youtube.*android.*player.*api',
r'bugsense',
r'crashlytics',
r'ouya.*sdk',
r'libspen23',
r'firebase',
r'''["']com.facebook.android['":]''',
2019-02-07 10:26:03 +01:00
r'cloudrail',
r'com.tencent.bugly',
r'appcenter-push',
2015-09-17 02:13:54 +02:00
]
}
whitelisted = [
'firebase-jobdispatcher', # https://github.com/firebase/firebase-jobdispatcher-android/blob/master/LICENSE
'com.firebaseui', # https://github.com/firebase/FirebaseUI-Android/blob/master/LICENSE
'geofire-android' # https://github.com/firebase/geofire-java/blob/master/LICENSE
]
def is_whitelisted(s):
return any(wl in s for wl in whitelisted)
2015-09-17 02:13:54 +02:00
def suspects_found(s):
2016-01-04 17:02:28 +01:00
for n, r in usual_suspects.items():
if r.match(s) and not is_whitelisted(s):
2015-09-17 02:13:54 +02:00
yield n
allowed_repos = [re.compile(r'^https://' + re.escape(repo) + r'/*') for repo in [
2015-10-22 12:22:46 +02:00
'repo1.maven.org/maven2', # mavenCentral()
'jcenter.bintray.com', # jcenter()
'jitpack.io',
'www.jitpack.io',
'repo.maven.apache.org/maven2',
'oss.jfrog.org/artifactory/oss-snapshot-local',
2015-10-22 12:22:46 +02:00
'oss.sonatype.org/content/repositories/snapshots',
'oss.sonatype.org/content/repositories/releases',
'oss.sonatype.org/content/groups/public',
'clojars.org/repo', # Clojure free software libs
's3.amazonaws.com/repo.commonsware.com', # CommonsWare
'plugins.gradle.org/m2', # Gradle plugin repo
2017-05-25 20:55:50 +02:00
'maven.google.com', # Google Maven Repo, https://developer.android.com/studio/build/dependencies.html#google-maven
]
] + [re.compile(r'^file://' + re.escape(repo) + r'/*') for repo in [
'/usr/share/maven-repo', # local repo on Debian installs
]
]
scanignore = common.getpaths_map(build_dir, build.scanignore)
scandelete = common.getpaths_map(build_dir, build.scandelete)
scanignore_worked = set()
scandelete_worked = set()
def toignore(path_in_build_dir):
2016-01-04 17:02:28 +01:00
for k, paths in scanignore.items():
for p in paths:
if path_in_build_dir.startswith(p):
scanignore_worked.add(k)
return True
return False
def todelete(path_in_build_dir):
2016-01-04 17:02:28 +01:00
for k, paths in scandelete.items():
for p in paths:
if path_in_build_dir.startswith(p):
scandelete_worked.add(k)
return True
return False
def ignoreproblem(what, path_in_build_dir):
msg = ('Ignoring %s at %s' % (what, path_in_build_dir))
logging.info(msg)
if json_per_build is not None:
json_per_build['infos'].append([msg, path_in_build_dir])
return 0
def removeproblem(what, path_in_build_dir, filepath):
msg = ('Removing %s at %s' % (what, path_in_build_dir))
logging.info(msg)
if json_per_build is not None:
json_per_build['infos'].append([msg, path_in_build_dir])
os.remove(filepath)
return 0
def warnproblem(what, path_in_build_dir):
if toignore(path_in_build_dir):
return 0
logging.warning('Found %s at %s' % (what, path_in_build_dir))
if json_per_build is not None:
json_per_build['warnings'].append([what, path_in_build_dir])
return 0
def handleproblem(what, path_in_build_dir, filepath):
if toignore(path_in_build_dir):
return ignoreproblem(what, path_in_build_dir)
if todelete(path_in_build_dir):
return removeproblem(what, path_in_build_dir, filepath)
if 'src/test' in filepath or '/test/' in filepath:
return warnproblem(what, path_in_build_dir)
if options and 'json' in vars(options) and options.json:
json_per_build['errors'].append([what, path_in_build_dir])
if options and (options.verbose or not ('json' in vars(options) and options.json)):
logging.error('Found %s at %s' % (what, path_in_build_dir))
return 1
Rewrite scanner logic Initially, the scanner used libmagic which used magic numbers in the file's content to detect what kind of file it appears to be. Since that library isn't available on all systems, we added support for two other libraries, mimetypes amongst them. The issue with mimetypes is that it only uses the file's extension, not its actual content. So this ends in variable behaviour depending on what system you're using fdroidserver on. For example, an executable binary without extension would be ignored if mimetypes was being used. We now drop all libraries - mimetypes too as it depends on the system's mime.types file - and instead check extensions ourselves. On top of that, do a simple binary content check to find binary executables that don't have an extension. The new in-house code without any dependencies doesn't add any new checks, so no builds should break. The current checks still work: % fdroid scanner app.openconnect:1029 [...] Found executable binary at assets/raw/armeabi/curl Found executable binary at assets/raw/mips/curl Found executable binary at assets/raw/x86/curl Found JAR file at lib/XposedBridgeApi-54.jar Found JAR file at libs/acra-4.5.0.jar Found JAR file at libs/openconnect-wrapper.jar Found JAR file at libs/stoken-wrapper.jar Found shared library at libs/armeabi/libopenconnect.so Found shared library at libs/armeabi/libstoken.so Found shared library at libs/mips/libopenconnect.so Found shared library at libs/mips/libstoken.so Found shared library at libs/x86/libopenconnect.so Found shared library at libs/x86/libstoken.so
2015-09-14 07:11:53 +02:00
def is_executable(path):
return os.path.exists(path) and os.access(path, os.X_OK)
textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
def is_binary(path):
d = None
with open(path, 'rb') as f:
d = f.read(1024)
return bool(d.translate(None, textchars))
# False positives patterns for files that are binary and executable.
safe_paths = [re.compile(r) for r in [
r".*/drawable[^/]*/.*\.png$", # png drawables
r".*/mipmap[^/]*/.*\.png$", # png mipmaps
]
]
def is_image_file(path):
if imghdr.what(path) is not None:
return True
def safe_path(path_in_build_dir):
for sp in safe_paths:
if sp.match(path_in_build_dir):
return True
return False
gradle_compile_commands = get_gradle_compile_commands(build)
def is_used_by_gradle(line):
return any(command.match(line) for command in gradle_compile_commands)
# Iterate through all files in the source code
for root, dirs, files in os.walk(build_dir, topdown=True):
# It's topdown, so checking the basename is enough
for ignoredir in ('.hg', '.git', '.svn', '.bzr'):
if ignoredir in dirs:
dirs.remove(ignoredir)
for curfile in files:
if curfile in ['.DS_Store']:
continue
# Path (relative) to the file
filepath = os.path.join(root, curfile)
if os.path.islink(filepath):
continue
path_in_build_dir = os.path.relpath(filepath, build_dir)
_ignored, ext = common.get_extension(path_in_build_dir)
if curfile in ('gradle-wrapper.jar', 'gradlew', 'gradlew.bat'):
removeproblem(curfile, path_in_build_dir, filepath)
elif ext == 'apk':
removeproblem(_('Android APK file'), path_in_build_dir, filepath)
elif ext == 'a':
count += handleproblem(_('static library'), path_in_build_dir, filepath)
elif ext == 'aar':
count += handleproblem(_('Android AAR library'), path_in_build_dir, filepath)
Rewrite scanner logic Initially, the scanner used libmagic which used magic numbers in the file's content to detect what kind of file it appears to be. Since that library isn't available on all systems, we added support for two other libraries, mimetypes amongst them. The issue with mimetypes is that it only uses the file's extension, not its actual content. So this ends in variable behaviour depending on what system you're using fdroidserver on. For example, an executable binary without extension would be ignored if mimetypes was being used. We now drop all libraries - mimetypes too as it depends on the system's mime.types file - and instead check extensions ourselves. On top of that, do a simple binary content check to find binary executables that don't have an extension. The new in-house code without any dependencies doesn't add any new checks, so no builds should break. The current checks still work: % fdroid scanner app.openconnect:1029 [...] Found executable binary at assets/raw/armeabi/curl Found executable binary at assets/raw/mips/curl Found executable binary at assets/raw/x86/curl Found JAR file at lib/XposedBridgeApi-54.jar Found JAR file at libs/acra-4.5.0.jar Found JAR file at libs/openconnect-wrapper.jar Found JAR file at libs/stoken-wrapper.jar Found shared library at libs/armeabi/libopenconnect.so Found shared library at libs/armeabi/libstoken.so Found shared library at libs/mips/libopenconnect.so Found shared library at libs/mips/libstoken.so Found shared library at libs/x86/libopenconnect.so Found shared library at libs/x86/libstoken.so
2015-09-14 07:11:53 +02:00
elif ext == 'class':
count += handleproblem(_('Java compiled class'), path_in_build_dir, filepath)
elif ext == 'dex':
count += handleproblem(_('Android DEX code'), path_in_build_dir, filepath)
elif ext == 'gz':
count += handleproblem(_('gzip file archive'), path_in_build_dir, filepath)
elif ext == 'so':
count += handleproblem(_('shared library'), path_in_build_dir, filepath)
elif ext == 'zip':
count += handleproblem(_('ZIP file archive'), path_in_build_dir, filepath)
Rewrite scanner logic Initially, the scanner used libmagic which used magic numbers in the file's content to detect what kind of file it appears to be. Since that library isn't available on all systems, we added support for two other libraries, mimetypes amongst them. The issue with mimetypes is that it only uses the file's extension, not its actual content. So this ends in variable behaviour depending on what system you're using fdroidserver on. For example, an executable binary without extension would be ignored if mimetypes was being used. We now drop all libraries - mimetypes too as it depends on the system's mime.types file - and instead check extensions ourselves. On top of that, do a simple binary content check to find binary executables that don't have an extension. The new in-house code without any dependencies doesn't add any new checks, so no builds should break. The current checks still work: % fdroid scanner app.openconnect:1029 [...] Found executable binary at assets/raw/armeabi/curl Found executable binary at assets/raw/mips/curl Found executable binary at assets/raw/x86/curl Found JAR file at lib/XposedBridgeApi-54.jar Found JAR file at libs/acra-4.5.0.jar Found JAR file at libs/openconnect-wrapper.jar Found JAR file at libs/stoken-wrapper.jar Found shared library at libs/armeabi/libopenconnect.so Found shared library at libs/armeabi/libstoken.so Found shared library at libs/mips/libopenconnect.so Found shared library at libs/mips/libstoken.so Found shared library at libs/x86/libopenconnect.so Found shared library at libs/x86/libstoken.so
2015-09-14 07:11:53 +02:00
elif ext == 'jar':
2015-09-17 02:13:54 +02:00
for name in suspects_found(curfile):
count += handleproblem('usual suspect \'%s\'' % name, path_in_build_dir, filepath)
count += handleproblem(_('Java JAR file'), path_in_build_dir, filepath)
2017-09-04 00:57:38 +02:00
Rewrite scanner logic Initially, the scanner used libmagic which used magic numbers in the file's content to detect what kind of file it appears to be. Since that library isn't available on all systems, we added support for two other libraries, mimetypes amongst them. The issue with mimetypes is that it only uses the file's extension, not its actual content. So this ends in variable behaviour depending on what system you're using fdroidserver on. For example, an executable binary without extension would be ignored if mimetypes was being used. We now drop all libraries - mimetypes too as it depends on the system's mime.types file - and instead check extensions ourselves. On top of that, do a simple binary content check to find binary executables that don't have an extension. The new in-house code without any dependencies doesn't add any new checks, so no builds should break. The current checks still work: % fdroid scanner app.openconnect:1029 [...] Found executable binary at assets/raw/armeabi/curl Found executable binary at assets/raw/mips/curl Found executable binary at assets/raw/x86/curl Found JAR file at lib/XposedBridgeApi-54.jar Found JAR file at libs/acra-4.5.0.jar Found JAR file at libs/openconnect-wrapper.jar Found JAR file at libs/stoken-wrapper.jar Found shared library at libs/armeabi/libopenconnect.so Found shared library at libs/armeabi/libstoken.so Found shared library at libs/mips/libopenconnect.so Found shared library at libs/mips/libstoken.so Found shared library at libs/x86/libopenconnect.so Found shared library at libs/x86/libstoken.so
2015-09-14 07:11:53 +02:00
elif ext == 'java':
if not os.path.isfile(filepath):
continue
with open(filepath, 'r', errors='replace') as f:
2016-01-04 17:59:47 +01:00
for line in f:
if 'DexClassLoader' in line:
count += handleproblem('DexClassLoader', path_in_build_dir, filepath)
2016-01-04 17:59:47 +01:00
break
Rewrite scanner logic Initially, the scanner used libmagic which used magic numbers in the file's content to detect what kind of file it appears to be. Since that library isn't available on all systems, we added support for two other libraries, mimetypes amongst them. The issue with mimetypes is that it only uses the file's extension, not its actual content. So this ends in variable behaviour depending on what system you're using fdroidserver on. For example, an executable binary without extension would be ignored if mimetypes was being used. We now drop all libraries - mimetypes too as it depends on the system's mime.types file - and instead check extensions ourselves. On top of that, do a simple binary content check to find binary executables that don't have an extension. The new in-house code without any dependencies doesn't add any new checks, so no builds should break. The current checks still work: % fdroid scanner app.openconnect:1029 [...] Found executable binary at assets/raw/armeabi/curl Found executable binary at assets/raw/mips/curl Found executable binary at assets/raw/x86/curl Found JAR file at lib/XposedBridgeApi-54.jar Found JAR file at libs/acra-4.5.0.jar Found JAR file at libs/openconnect-wrapper.jar Found JAR file at libs/stoken-wrapper.jar Found shared library at libs/armeabi/libopenconnect.so Found shared library at libs/armeabi/libstoken.so Found shared library at libs/mips/libopenconnect.so Found shared library at libs/mips/libstoken.so Found shared library at libs/x86/libopenconnect.so Found shared library at libs/x86/libstoken.so
2015-09-14 07:11:53 +02:00
elif ext == 'gradle':
if not os.path.isfile(filepath):
continue
with open(filepath, 'r', errors='replace') as f:
lines = f.readlines()
for i, line in enumerate(lines):
if is_used_by_gradle(line):
for name in suspects_found(line):
count += handleproblem("usual suspect \'%s\'" % (name),
path_in_build_dir, filepath)
noncomment_lines = [line for line in lines if not common.gradle_comment.match(line)]
no_comments = re.sub(r'/\*.*?\*/', '', ''.join(noncomment_lines), flags=re.DOTALL)
for url in MAVEN_URL_REGEX.findall(no_comments):
if not any(r.match(url) for r in allowed_repos):
count += handleproblem('unknown maven repo \'%s\'' % url, path_in_build_dir, filepath)
elif ext in ['', 'bin', 'out', 'exe']:
if is_binary(filepath):
count += handleproblem('binary', path_in_build_dir, filepath)
elif is_executable(filepath):
if is_binary(filepath) and not (safe_path(path_in_build_dir) or is_image_file(filepath)):
warnproblem(_('executable binary, possibly code'), path_in_build_dir)
Rewrite scanner logic Initially, the scanner used libmagic which used magic numbers in the file's content to detect what kind of file it appears to be. Since that library isn't available on all systems, we added support for two other libraries, mimetypes amongst them. The issue with mimetypes is that it only uses the file's extension, not its actual content. So this ends in variable behaviour depending on what system you're using fdroidserver on. For example, an executable binary without extension would be ignored if mimetypes was being used. We now drop all libraries - mimetypes too as it depends on the system's mime.types file - and instead check extensions ourselves. On top of that, do a simple binary content check to find binary executables that don't have an extension. The new in-house code without any dependencies doesn't add any new checks, so no builds should break. The current checks still work: % fdroid scanner app.openconnect:1029 [...] Found executable binary at assets/raw/armeabi/curl Found executable binary at assets/raw/mips/curl Found executable binary at assets/raw/x86/curl Found JAR file at lib/XposedBridgeApi-54.jar Found JAR file at libs/acra-4.5.0.jar Found JAR file at libs/openconnect-wrapper.jar Found JAR file at libs/stoken-wrapper.jar Found shared library at libs/armeabi/libopenconnect.so Found shared library at libs/armeabi/libstoken.so Found shared library at libs/mips/libopenconnect.so Found shared library at libs/mips/libstoken.so Found shared library at libs/x86/libopenconnect.so Found shared library at libs/x86/libstoken.so
2015-09-14 07:11:53 +02:00
for p in scanignore:
if p not in scanignore_worked:
logging.error(_('Unused scanignore path: %s') % p)
count += 1
for p in scandelete:
if p not in scandelete_worked:
logging.error(_('Unused scandelete path: %s') % p)
count += 1
return count
def main():
global config, options, json_per_build
# Parse command line...
parser = ArgumentParser(usage="%(prog)s [options] [APPID[:VERCODE] [APPID[:VERCODE] ...]]")
2015-09-12 08:42:50 +02:00
common.setup_global_opts(parser)
parser.add_argument("appid", nargs='*', help=_("applicationId with optional versionCode in the form APPID[:VERCODE]"))
parser.add_argument("-f", "--force", action="store_true", default=False,
help=_("Force scan of disabled apps and builds."))
parser.add_argument("--json", action="store_true", default=False,
help=_("Output JSON to stdout."))
metadata.add_metadata_arguments(parser)
options = parser.parse_args()
metadata.warnings_action = options.W
json_output = dict()
if options.json:
if options.verbose:
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
else:
logging.getLogger().setLevel(logging.ERROR)
config = common.read_config(options)
# Read all app and srclib metadata
2013-12-19 23:06:57 +01:00
allapps = metadata.read_metadata()
apps = common.read_app_args(options.appid, allapps, True)
probcount = 0
2013-10-29 13:23:42 +01:00
build_dir = 'build'
if not os.path.isdir(build_dir):
logging.info("Creating build directory")
2013-10-29 13:23:42 +01:00
os.makedirs(build_dir)
srclib_dir = os.path.join(build_dir, 'srclib')
extlib_dir = os.path.join(build_dir, 'extlib')
2016-01-04 17:02:28 +01:00
for appid, app in apps.items():
json_per_appid = dict()
if app.Disabled and not options.force:
logging.info(_("Skipping {appid}: disabled").format(appid=appid))
json_per_appid['disabled'] = json_per_build['infos'].append('Skipping: disabled')
2013-12-19 23:06:57 +01:00
continue
2013-12-19 23:06:57 +01:00
try:
if app.RepoType == 'srclib':
build_dir = os.path.join('build', 'srclib', app.Repo)
else:
build_dir = os.path.join('build', appid)
if app.builds:
logging.info(_("Processing {appid}").format(appid=appid))
# Set up vcs interface and make sure we have the latest code...
vcs = common.getvcs(app.RepoType, app.Repo, build_dir)
else:
logging.info(_("{appid}: no builds specified, running on current source state")
.format(appid=appid))
json_per_build = DEFAULT_JSON_PER_BUILD
json_per_appid['current-source-state'] = json_per_build
count = scan_source(build_dir)
if count > 0:
logging.warning(_('Scanner found {count} problems in {appid}:')
.format(count=count, appid=appid))
probcount += count
app.builds = []
for build in app.builds:
json_per_build = DEFAULT_JSON_PER_BUILD
json_per_appid[build.versionCode] = json_per_build
if build.disable and not options.force:
logging.info("...skipping version %s - %s" % (
build.versionName, build.get('disable', build.commit[1:])))
continue
logging.info("...scanning version " + build.versionName)
# Prepare the source code...
common.prepare_source(vcs, app, build,
build_dir, srclib_dir,
extlib_dir, False)
count = scan_source(build_dir, build)
if count > 0:
logging.warning(_('Scanner found {count} problems in {appid}:{versionCode}:')
.format(count=count, appid=appid, versionCode=build.versionCode))
probcount += count
2013-12-19 23:06:57 +01:00
except BuildException as be:
logging.warning('Could not scan app %s due to BuildException: %s' % (
appid, be))
probcount += 1
2013-12-19 23:06:57 +01:00
except VCSException as vcse:
logging.warning('VCS error while scanning app %s: %s' % (appid, vcse))
probcount += 1
2013-12-19 23:06:57 +01:00
except Exception:
logging.warning('Could not scan app %s due to unknown error: %s' % (
appid, traceback.format_exc()))
probcount += 1
for k, v in json_per_appid.items():
if len(v['errors']) or len(v['warnings']) or len(v['infos']):
json_output[appid] = json_per_appid
break
logging.info(_("Finished"))
if options.json:
print(json.dumps(json_output))
else:
print(_("%d problems found") % probcount)
if __name__ == "__main__":
main()