use partial androguard binary XML parsing to speed up APK ID lookup

Normally, androguard parses the entire APK before it is possible to get any
values from it.  This uses androguard primitives to only attempt to parse
the AndroidManifest.xml, then to quit as soon as it gets what it needs.
This greatly speeds up the parsing (1 minute vs 60 minutes).

fdroid/fdroidserver#557
This commit is contained in:
Hans-Christoph Steiner 2018-09-18 15:29:24 +02:00
parent fa09337b4b
commit a3cecc16a3
2 changed files with 69 additions and 12 deletions

View File

@ -2096,26 +2096,83 @@ def is_apk_and_debuggable(apkfile):
def get_apk_id(apkfile):
"""Extract identification information from APK using aapt.
"""Extract identification information from APK.
Androguard is preferred since it is more reliable and a lot
faster. Occasionally, when androguard can't get the info from the
APK, aapt still can. So aapt is also used as the final fallback
method.
:param apkfile: path to an APK file.
:returns: triplet (appid, version code, version name)
"""
if use_androguard():
return get_apk_id_androguard(apkfile)
try:
return get_apk_id_androguard(apkfile)
except zipfile.BadZipFile as e:
logging.error(apkfile + ': ' + str(e))
if 'aapt' in config:
return get_apk_id_aapt(apkfile)
else:
return get_apk_id_aapt(apkfile)
def get_apk_id_androguard(apkfile):
"""Read (appid, versionCode, versionName) from an APK
This first tries to do quick binary XML parsing to just get the
values that are needed. It will fallback to full androguard
parsing, which is slow, if it can't find the versionName value or
versionName is set to a Android String Resource (e.g. an integer
hex value that starts with @).
"""
if not os.path.exists(apkfile):
raise FDroidException(_("Reading packageName/versionCode/versionName failed, APK invalid: '{apkfilename}'")
.format(apkfilename=apkfile))
a = _get_androguard_APK(apkfile)
versionName = ensure_final_value(a.package, a.get_android_resources(), a.get_androidversion_name())
from androguard.core.bytecodes.axml import AXMLParser, format_value, START_TAG, END_TAG, TEXT, END_DOCUMENT
appid = None
versionCode = None
versionName = None
with zipfile.ZipFile(apkfile) as apk:
with apk.open('AndroidManifest.xml') as manifest:
axml = AXMLParser(manifest.read())
count = 0
while axml.is_valid():
_type = next(axml)
count += 1
if _type == START_TAG:
for i in range(0, axml.getAttributeCount()):
name = axml.getAttributeName(i)
_type = axml.getAttributeValueType(i)
_data = axml.getAttributeValueData(i)
value = format_value(_type, _data, lambda _: axml.getAttributeValue(i))
if appid is None and name == 'package':
appid = value
elif versionCode is None and name == 'versionCode':
if value.startswith('0x'):
versionCode = str(int(value, 16))
else:
versionCode = value
elif versionName is None and name == 'versionName':
versionName = value
if axml.getName() == 'manifest':
break
elif _type == END_TAG or _type == TEXT or _type == END_DOCUMENT:
raise RuntimeError('{path}: <manifest> must be the first element in AndroidManifest.xml'
.format(path=apkfile))
if not versionName or versionName[0] == '@':
a = _get_androguard_APK(apkfile)
versionName = ensure_final_value(a.package, a.get_android_resources(), a.get_androidversion_name())
if not versionName:
versionName = '' # versionName is expected to always be a str
return a.package, a.get_androidversion_code(), versionName
return appid, versionCode, versionName.strip('\0')
def get_apk_id_aapt(apkfile):

View File

@ -611,14 +611,14 @@ class CommonTest(unittest.TestCase):
for apkfilename, appid, versionCode, versionName in testcases:
if 'aapt' in config:
a, vc, vn = fdroidserver.common.get_apk_id_aapt(apkfilename)
self.assertEqual(appid, a)
self.assertEqual(versionCode, vc)
self.assertEqual(versionName, vn)
self.assertEqual(appid, a, 'aapt appid parsing failed for ' + apkfilename)
self.assertEqual(versionCode, vc, 'aapt versionCode parsing failed for ' + apkfilename)
self.assertEqual(versionName, vn, 'aapt versionName parsing failed for ' + apkfilename)
if fdroidserver.common.use_androguard():
a, vc, vn = fdroidserver.common.get_apk_id_androguard(apkfilename)
self.assertEqual(appid, a)
self.assertEqual(versionCode, vc)
self.assertEqual(versionName, vn)
a, vc, vn = fdroidserver.common.get_apk_id(apkfilename)
self.assertEqual(appid, a, 'androguard appid parsing failed for ' + apkfilename)
self.assertEqual(versionName, vn, 'androguard versionName parsing failed for ' + apkfilename)
self.assertEqual(versionCode, vc, 'androguard versionCode parsing failed for ' + apkfilename)
with self.assertRaises(FDroidException):
fdroidserver.common.get_apk_id('nope')