From Fedora Project Wiki
(script to get stats from AppData & AppStream)
 
(Replaced content with "Not used anymore")
 
Line 1: Line 1:
<pre><nowiki>
Not used anymore
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/gpl-3.0.html>.
#
# @author Jean-Baptiste Holcroft <jb.holcroft@gmail.com>
# TODO : add keywords statistics
 
import xml.etree.ElementTree as ET
import csv
from datetime import date
 
STATISTIC_FILE = 'fedora-23.xml'
LANGAGE_DETAILED = "fr"
RESULT_FILE = './output_file-%s-%s-%s.csv' \
    % (date.today().year, date.today().month, date.today().day)
RESULT_FILE_LANGAGE = './output_file_%s-%s-%s-%s.csv' \
    % (LANGAGE_DETAILED, date.today().year, date.today().month, date.today().day)
 
NS_KEY = "http://www.w3.org/XML/1998/namespace"
NS_MAP = {"xml": NS_KEY}
 
TRANSLATABLE_FIELDS = ["name", "summary", "description"]
languages = []
projects_statistics = {}
 
# open global xml file
tree = ET.parse(STATISTIC_FILE)
root = tree.getroot()
 
#
# GLOBAL STATISTICS
#
print("Make global statistics")
# initiate list of languages
for i in root.findall(".//*[@xml:lang]", namespaces=NS_MAP):
    lang = i.get("{%s}lang" % NS_KEY)
    languages.append(lang)
languages = list(set(languages))
component_languages = languages.copy()
 
output_for_csv = []
header_line = ["project", "type", "url"] + languages
output_for_csv.append(header_line)
 
for component in root.findall("component"):
    package_name = component.find("pkgname").text
    package_type = component.get("type")
    package_homepage = ""
    language_statistic = [0] * len(languages)
 
    # get project url
    for url in component.findall("url"):
        if url.get("type") == "homepage":
            package_homepage = url.text
 
    # get project statistics
    for translatable_field in TRANSLATABLE_FIELDS:
        for field in component.findall(translatable_field):
            lang = field.get("{%s}lang" % NS_KEY)
            if lang != None:
                language_statistic[languages.index(lang)] += 1 / len(TRANSLATABLE_FIELDS)
 
    package_info = [package_name, package_type, package_homepage]
    csv_line = package_info + language_statistic
 
    output_for_csv.append(csv_line)
 
with open(RESULT_FILE, 'w', newline='') as csvfile:
    result_file_csv = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    for row in output_for_csv:
        result_file_csv.writerow(row)
 
#
# search for one language
#
print("Make statistics for language %s" % LANGAGE_DETAILED)
output_for_csv = []
header_line = ["project", "type", "url"] + TRANSLATABLE_FIELDS + ["package stats"]
output_for_csv.append(header_line)
 
for component in root.findall("component"):
    package_name = component.find("pkgname").text
    package_type = component.get("type")
    package_homepage = ""
    language_statistic = [""] * len(TRANSLATABLE_FIELDS)
    embedded_statistic = [""]
 
    # get project url
    for url in component.findall("url"):
        if url.get("type") == "homepage":
            package_homepage = url.text
 
    for translatable_field in TRANSLATABLE_FIELDS:
        for field in component.findall(translatable_field):
            lang = field.get("{%s}lang" % NS_KEY)
            if lang == LANGAGE_DETAILED:
                language_statistic[TRANSLATABLE_FIELDS.index(translatable_field)] = "oui"
 
    for field in component.findall(".//lang"):
        if field.text == LANGAGE_DETAILED:
            percent = field.get("percentage")
            embedded_statistic[0] = percent
 
    package_info = [package_name, package_type, package_homepage]
    csv_line = package_info + language_statistic + embedded_statistic
    output_for_csv.append(csv_line)
 
with open(RESULT_FILE_LANGAGE, 'w', newline='') as csvfile:
    result_file_csv = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    for row in output_for_csv:
        result_file_csv.writerow(row)
 
print("Done")
</nowiki></pre>

Latest revision as of 15:24, 6 December 2015

Not used anymore